00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "autoconf.h"
00044 #include "config.h"
00045
00046 #include <limits.h>
00047 #include <string.h>
00048 #include <ctype.h>
00049 #include <stdlib.h>
00050 #include <stddef.h>
00051 #include "pcre.h"
00052
00053 #include "externs.h"
00054 #include "timeutil.h"
00055
00056
00057 #define LINK_SIZE 2
00058 #define MATCH_LIMIT 100000
00059 #define NEWLINE '\n'
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078 #define PUT(a,n,d) \
00079 (a[n] = (d) >> 8), \
00080 (a[(n)+1] = (d) & 255)
00081
00082 #define GET(a,n) \
00083 (((a)[n] << 8) | (a)[(n)+1])
00084
00085 #define MAX_PATTERN_SIZE (1 << 16)
00086
00087
00088
00089
00090 #define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
00091
00092
00093
00094
00095
00096
00097 #define PUT2(a,n,d) \
00098 a[n] = (d) >> 8; \
00099 a[(n)+1] = (d) & 255
00100
00101 #define GET2(a,n) \
00102 (((a)[n] << 8) | (a)[(n)+1])
00103
00104 #define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
00105
00106
00107
00108 #define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
00109
00110
00111
00112
00113
00114
00115
00116 #define PCRE_FIRSTSET 0x40000000
00117 #define PCRE_REQCHSET 0x20000000
00118 #define PCRE_STARTLINE 0x10000000
00119 #define PCRE_ICHANGED 0x08000000
00120
00121
00122
00123 #define PCRE_STUDY_MAPPED 0x01
00124
00125
00126
00127
00128 #define PUBLIC_OPTIONS \
00129 (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
00130 PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
00131 PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)
00132
00133 #define PUBLIC_EXEC_OPTIONS \
00134 (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)
00135
00136 #define PUBLIC_STUDY_OPTIONS 0
00137
00138
00139
00140 #define MAGIC_NUMBER 0x50435245UL
00141
00142
00143
00144 #define REQ_UNSET (-2)
00145 #define REQ_NONE (-1)
00146
00147
00148
00149
00150 #define REQ_CASELESS 0x0100
00151 #define REQ_VARY 0x0200
00152
00153
00154
00155
00156
00157
00158
00159 #ifndef ESC_e
00160 #define ESC_e 27
00161 #endif
00162
00163 #ifndef ESC_f
00164 #define ESC_f '\f'
00165 #endif
00166
00167 #ifndef ESC_n
00168 #define ESC_n NEWLINE
00169 #endif
00170
00171 #ifndef ESC_r
00172 #define ESC_r '\r'
00173 #endif
00174
00175
00176
00177
00178 #ifndef ESC_tee
00179 #define ESC_tee '\t'
00180 #endif
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193 enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
00194 ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF };
00195
00196
00197
00198
00199 #define XCL_NOT 0x01
00200 #define XCL_MAP 0x02
00201
00202 #define XCL_END 0
00203 #define XCL_SINGLE 1
00204 #define XCL_RANGE 2
00205
00206
00207
00208
00209
00210
00211
00212
00213 enum {
00214 OP_END,
00215
00216
00217
00218 OP_SOD,
00219 OP_SOM,
00220 OP_NOT_WORD_BOUNDARY,
00221 OP_WORD_BOUNDARY,
00222 OP_NOT_DIGIT,
00223 OP_DIGIT,
00224 OP_NOT_WHITESPACE,
00225 OP_WHITESPACE,
00226 OP_NOT_WORDCHAR,
00227 OP_WORDCHAR,
00228 OP_ANY,
00229 OP_ANYBYTE,
00230 OP_EODN,
00231 OP_EOD,
00232
00233 OP_OPT,
00234 OP_CIRC,
00235 OP_DOLL,
00236 OP_CHARS,
00237 OP_NOT,
00238
00239 OP_STAR,
00240 OP_MINSTAR,
00241 OP_PLUS,
00242 OP_MINPLUS,
00243 OP_QUERY,
00244 OP_MINQUERY,
00245 OP_UPTO,
00246 OP_MINUPTO,
00247 OP_EXACT,
00248
00249 OP_NOTSTAR,
00250 OP_NOTMINSTAR,
00251 OP_NOTPLUS,
00252 OP_NOTMINPLUS,
00253 OP_NOTQUERY,
00254 OP_NOTMINQUERY,
00255 OP_NOTUPTO,
00256 OP_NOTMINUPTO,
00257 OP_NOTEXACT,
00258
00259 OP_TYPESTAR,
00260 OP_TYPEMINSTAR,
00261 OP_TYPEPLUS,
00262 OP_TYPEMINPLUS,
00263 OP_TYPEQUERY,
00264 OP_TYPEMINQUERY,
00265 OP_TYPEUPTO,
00266 OP_TYPEMINUPTO,
00267 OP_TYPEEXACT,
00268
00269 OP_CRSTAR,
00270 OP_CRMINSTAR,
00271 OP_CRPLUS,
00272 OP_CRMINPLUS,
00273 OP_CRQUERY,
00274 OP_CRMINQUERY,
00275 OP_CRRANGE,
00276 OP_CRMINRANGE,
00277
00278 OP_CLASS,
00279 OP_NCLASS,
00280
00281
00282
00283 OP_XCLASS,
00284
00285
00286 OP_REF,
00287 OP_RECURSE,
00288 OP_CALLOUT,
00289
00290 OP_ALT,
00291 OP_KET,
00292 OP_KETRMAX,
00293 OP_KETRMIN,
00294
00295
00296
00297 OP_ASSERT,
00298 OP_ASSERT_NOT,
00299 OP_ASSERTBACK,
00300 OP_ASSERTBACK_NOT,
00301 OP_REVERSE,
00302
00303
00304
00305
00306 OP_ONCE,
00307 OP_COND,
00308 OP_CREF,
00309
00310 OP_BRAZERO,
00311 OP_BRAMINZERO,
00312
00313 OP_BRANUMBER,
00314
00315
00316 OP_BRA
00317
00318
00319 };
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330 #define OP_NAME_LIST \
00331 "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
00332 "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z", \
00333 "Opt", "^", "$", "chars", "not", \
00334 "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
00335 "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
00336 "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
00337 "*", "*?", "+", "+?", "?", "??", "{", "{", \
00338 "class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
00339 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
00340 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
00341 "Brazero", "Braminzero", "Branumber", "Bra"
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353 #define OP_LENGTHS \
00354 1, \
00355 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
00356 1, 1, 1, 1, 2, 1, 1, \
00357 2, \
00358 2, \
00359 \
00360 2, 2, 2, 2, 2, 2, \
00361 4, 4, 4, \
00362 \
00363 2, 2, 2, 2, 2, 2, \
00364 4, 4, 4, \
00365 \
00366 2, 2, 2, 2, 2, 2, \
00367 4, 4, 4, \
00368 \
00369 1, 1, 1, 1, 1, 1, \
00370 5, 5, \
00371 33, \
00372 33, \
00373 0, \
00374 3, \
00375 1+LINK_SIZE, \
00376 2, \
00377 1+LINK_SIZE, \
00378 1+LINK_SIZE, \
00379 1+LINK_SIZE, \
00380 1+LINK_SIZE, \
00381 1+LINK_SIZE, \
00382 1+LINK_SIZE, \
00383 1+LINK_SIZE, \
00384 1+LINK_SIZE, \
00385 1+LINK_SIZE, \
00386 1+LINK_SIZE, \
00387 1+LINK_SIZE, \
00388 3, \
00389 1, 1, \
00390 3, \
00391 1+LINK_SIZE \
00392
00393
00394
00395
00396
00397
00398
00399
00400 #define EXTRACT_BASIC_MAX 150
00401
00402
00403
00404 #define CREF_RECURSE 0xffff
00405
00406
00407
00408
00409
00410
00411 #define ERR1 "\\ at end of pattern"
00412 #define ERR2 "\\c at end of pattern"
00413 #define ERR3 "unrecognized character follows \\"
00414 #define ERR4 "numbers out of order in {} quantifier"
00415 #define ERR5 "number too big in {} quantifier"
00416 #define ERR6 "missing terminating ] for character class"
00417 #define ERR7 "invalid escape sequence in character class"
00418 #define ERR8 "range out of order in character class"
00419 #define ERR9 "nothing to repeat"
00420 #define ERR10 "operand of unlimited repeat could match the empty string"
00421 #define ERR11 "internal error: unexpected repeat"
00422 #define ERR12 "unrecognized character after (?"
00423 #define ERR13 "POSIX named classes are supported only within a class"
00424 #define ERR14 "missing )"
00425 #define ERR15 "reference to non-existent subpattern"
00426 #define ERR16 "erroffset passed as NULL"
00427 #define ERR17 "unknown option bit(s) set"
00428 #define ERR18 "missing ) after comment"
00429 #define ERR19 "parentheses nested too deeply"
00430 #define ERR20 "regular expression too large"
00431 #define ERR21 "failed to get memory"
00432 #define ERR22 "unmatched parentheses"
00433 #define ERR23 "internal error: code overflow"
00434 #define ERR24 "unrecognized character after (?<"
00435 #define ERR25 "lookbehind assertion is not fixed length"
00436 #define ERR26 "malformed number after (?("
00437 #define ERR27 "conditional group contains more than two branches"
00438 #define ERR28 "assertion expected after (?("
00439 #define ERR29 "(?R or (?digits must be followed by )"
00440 #define ERR30 "unknown POSIX class name"
00441 #define ERR31 "POSIX collating elements are not supported"
00442 #define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
00443 #define ERR33 "spare error"
00444 #define ERR34 "character value in \\x{...} sequence is too large"
00445 #define ERR35 "invalid condition (?(0)"
00446 #define ERR36 "\\C not allowed in lookbehind assertion"
00447 #define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X"
00448 #define ERR38 "number after (?C is > 255"
00449 #define ERR39 "closing ) for (?C expected"
00450 #define ERR40 "recursive call could loop indefinitely"
00451 #define ERR41 "unrecognized character after (?P"
00452 #define ERR42 "syntax error after (?P"
00453 #define ERR43 "two named groups have the same name"
00454 #define ERR44 "invalid UTF-8 string"
00455
00456
00457
00458
00459
00460
00461
00462
00463 typedef unsigned char uschar;
00464
00465
00466
00467
00468 typedef struct real_pcre {
00469 unsigned long int magic_number;
00470 size_t size;
00471 const unsigned char *tables;
00472 unsigned long int options;
00473 unsigned short int top_bracket;
00474 unsigned short int top_backref;
00475 unsigned short int first_byte;
00476 unsigned short int req_byte;
00477 unsigned short int name_entry_size;
00478 unsigned short int name_count;
00479 } real_pcre;
00480
00481
00482
00483 typedef struct pcre_study_data {
00484 size_t size;
00485 uschar options;
00486 uschar start_bits[32];
00487 } pcre_study_data;
00488
00489
00490
00491
00492 typedef struct compile_data {
00493 const uschar *lcc;
00494 const uschar *fcc;
00495 const uschar *cbits;
00496 const uschar *ctypes;
00497 const uschar *start_code;
00498 uschar *name_table;
00499 int names_found;
00500 int name_entry_size;
00501 int top_backref;
00502 unsigned int backref_map;
00503 int req_varyopt;
00504 } compile_data;
00505
00506
00507
00508
00509 typedef struct branch_chain {
00510 struct branch_chain *outer;
00511 uschar *current;
00512 } branch_chain;
00513
00514
00515
00516
00517 typedef struct recursion_info {
00518 struct recursion_info *prevrec;
00519 int group_num;
00520 const uschar *after_call;
00521 const uschar *save_start;
00522 int *offset_save;
00523 int saved_max;
00524 } recursion_info;
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537 typedef struct match_data {
00538 unsigned long int match_call_count;
00539 unsigned long int match_limit;
00540 int *offset_vector;
00541 int offset_end;
00542 int offset_max;
00543 const uschar *lcc;
00544 const uschar *ctypes;
00545 bool offset_overflow;
00546 bool notbol;
00547 bool noteol;
00548 bool utf8;
00549 bool endonly;
00550 bool notempty;
00551 const uschar *start_code;
00552 const uschar *start_subject;
00553 const uschar *end_subject;
00554 const uschar *start_match;
00555 const uschar *end_match_ptr;
00556 int end_offset_top;
00557 int capture_last;
00558 int start_offset;
00559 recursion_info *recursive;
00560 void *callout_data;
00561 } match_data;
00562
00563
00564
00565 #define ctype_space 0x01
00566 #define ctype_letter 0x02
00567 #define ctype_digit 0x04
00568 #define ctype_xdigit 0x08
00569 #define ctype_word 0x10
00570 #define ctype_meta 0x80
00571
00572
00573
00574
00575 #define cbit_space 0
00576 #define cbit_xdigit 32
00577 #define cbit_digit 64
00578 #define cbit_upper 96
00579 #define cbit_lower 128
00580 #define cbit_word 160
00581 #define cbit_graph 192
00582 #define cbit_print 224
00583 #define cbit_punct 256
00584 #define cbit_cntrl 288
00585 #define cbit_length 320
00586
00587
00588
00589
00590 #define lcc_offset 0
00591 #define fcc_offset 256
00592 #define cbits_offset 512
00593 #define ctypes_offset (cbits_offset + cbit_length)
00594 #define tables_length (ctypes_offset + 256)
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610 static unsigned char pcre_default_tables[] = {
00611
00612
00613
00614 0, 1, 2, 3, 4, 5, 6, 7,
00615 8, 9, 10, 11, 12, 13, 14, 15,
00616 16, 17, 18, 19, 20, 21, 22, 23,
00617 24, 25, 26, 27, 28, 29, 30, 31,
00618 32, 33, 34, 35, 36, 37, 38, 39,
00619 40, 41, 42, 43, 44, 45, 46, 47,
00620 48, 49, 50, 51, 52, 53, 54, 55,
00621 56, 57, 58, 59, 60, 61, 62, 63,
00622 64, 97, 98, 99,100,101,102,103,
00623 104,105,106,107,108,109,110,111,
00624 112,113,114,115,116,117,118,119,
00625 120,121,122, 91, 92, 93, 94, 95,
00626 96, 97, 98, 99,100,101,102,103,
00627 104,105,106,107,108,109,110,111,
00628 112,113,114,115,116,117,118,119,
00629 120,121,122,123,124,125,126,127,
00630 128,129,130,131,132,133,134,135,
00631 136,137,138,139,140,141,142,143,
00632 144,145,146,147,148,149,150,151,
00633 152,153,154,155,156,157,158,159,
00634 160,161,162,163,164,165,166,167,
00635 168,169,170,171,172,173,174,175,
00636 176,177,178,179,180,181,182,183,
00637 184,185,186,187,188,189,190,191,
00638 192,193,194,195,196,197,198,199,
00639 200,201,202,203,204,205,206,207,
00640 208,209,210,211,212,213,214,215,
00641 216,217,218,219,220,221,222,223,
00642 224,225,226,227,228,229,230,231,
00643 232,233,234,235,236,237,238,239,
00644 240,241,242,243,244,245,246,247,
00645 248,249,250,251,252,253,254,255,
00646
00647
00648
00649 0, 1, 2, 3, 4, 5, 6, 7,
00650 8, 9, 10, 11, 12, 13, 14, 15,
00651 16, 17, 18, 19, 20, 21, 22, 23,
00652 24, 25, 26, 27, 28, 29, 30, 31,
00653 32, 33, 34, 35, 36, 37, 38, 39,
00654 40, 41, 42, 43, 44, 45, 46, 47,
00655 48, 49, 50, 51, 52, 53, 54, 55,
00656 56, 57, 58, 59, 60, 61, 62, 63,
00657 64, 97, 98, 99,100,101,102,103,
00658 104,105,106,107,108,109,110,111,
00659 112,113,114,115,116,117,118,119,
00660 120,121,122, 91, 92, 93, 94, 95,
00661 96, 65, 66, 67, 68, 69, 70, 71,
00662 72, 73, 74, 75, 76, 77, 78, 79,
00663 80, 81, 82, 83, 84, 85, 86, 87,
00664 88, 89, 90,123,124,125,126,127,
00665 128,129,130,131,132,133,134,135,
00666 136,137,138,139,140,141,142,143,
00667 144,145,146,147,148,149,150,151,
00668 152,153,154,155,156,157,158,159,
00669 160,161,162,163,164,165,166,167,
00670 168,169,170,171,172,173,174,175,
00671 176,177,178,179,180,181,182,183,
00672 184,185,186,187,188,189,190,191,
00673 192,193,194,195,196,197,198,199,
00674 200,201,202,203,204,205,206,207,
00675 208,209,210,211,212,213,214,215,
00676 216,217,218,219,220,221,222,223,
00677 224,225,226,227,228,229,230,231,
00678 232,233,234,235,236,237,238,239,
00679 240,241,242,243,244,245,246,247,
00680 248,249,250,251,252,253,254,255,
00681
00682
00683
00684
00685
00686
00687
00688 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
00689 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00690 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00691 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00692
00693 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
00694 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
00695 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00696 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00697
00698 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
00699 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00700 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00701 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00702
00703 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00704 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
00705 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00706 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00707
00708 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00709 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
00710 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00711 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00712
00713 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
00714 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
00715 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00716 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00717
00718 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
00719 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
00720 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00721 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00722
00723 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
00724 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
00725 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00726 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00727
00728 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
00729 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
00730 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00731 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00732
00733 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
00734 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
00735 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00736 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00748 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00,
00749 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00750 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00751 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
00752 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00,
00753 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,
00754 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80,
00755 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
00756 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00757 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00758 0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10,
00759 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
00760 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00761 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00762 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00,
00763 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00764 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00765 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00766 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00767 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00768 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00769 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00770 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00771 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00774 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00775 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00776 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809 int
00810 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
00811 int stringnumber, char *buffer, int size)
00812 {
00813 int yield;
00814 if (stringnumber < 0 || stringnumber >= stringcount)
00815 return PCRE_ERROR_NOSUBSTRING;
00816 stringnumber *= 2;
00817 yield = ovector[stringnumber+1] - ovector[stringnumber];
00818 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
00819 memcpy(buffer, subject + ovector[stringnumber], yield);
00820 buffer[yield] = 0;
00821 return yield;
00822 }
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840 const unsigned char *
00841 pcre_maketables(void)
00842 {
00843 unsigned char *yield, *p;
00844 int i;
00845
00846 yield = static_cast<unsigned char*>(malloc(tables_length));
00847
00848 if (yield == NULL) return NULL;
00849 p = yield;
00850
00851
00852
00853 for (i = 0; i < 256; i++) *p++ = tolower(i);
00854
00855
00856
00857 for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
00858
00859
00860
00861
00862
00863
00864 memset(p, 0, cbit_length);
00865 for (i = 0; i < 256; i++)
00866 {
00867 if (isdigit(i))
00868 {
00869 p[cbit_digit + i/8] |= 1 << (i&7);
00870 p[cbit_word + i/8] |= 1 << (i&7);
00871 }
00872 if (isupper(i))
00873 {
00874 p[cbit_upper + i/8] |= 1 << (i&7);
00875 p[cbit_word + i/8] |= 1 << (i&7);
00876 }
00877 if (islower(i))
00878 {
00879 p[cbit_lower + i/8] |= 1 << (i&7);
00880 p[cbit_word + i/8] |= 1 << (i&7);
00881 }
00882 if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
00883 if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
00884 if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
00885 if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
00886 if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
00887 if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
00888 if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
00889 }
00890 p += cbit_length;
00891
00892
00893
00894
00895
00896 for (i = 0; i < 256; i++)
00897 {
00898 int x = 0;
00899 if (i != 0x0b && isspace(i)) x += ctype_space;
00900 if (isalpha(i)) x += ctype_letter;
00901 if (isdigit(i)) x += ctype_digit;
00902 if (isxdigit(i)) x += ctype_xdigit;
00903 if (isalnum(i) || i == '_') x += ctype_word;
00904 if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
00905 *p++ = x;
00906 }
00907
00908 return yield;
00909 }
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930 static void
00931 set_bit(uschar *start_bits, int c, bool caseless, compile_data *cd)
00932 {
00933 start_bits[c/8] |= (1 << (c&7));
00934 if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
00935 start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
00936 }
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958 static bool
00959 set_start_bits(const uschar *code, uschar *start_bits, bool caseless,
00960 bool utf8, compile_data *cd)
00961 {
00962 register int c;
00963
00964
00965
00966
00967
00968
00969
00970 volatile int dummy;
00971
00972 do
00973 {
00974 const uschar *tcode = code + 1 + LINK_SIZE;
00975 bool try_next = true;
00976
00977 while (try_next)
00978 {
00979
00980
00981
00982 if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
00983 {
00984 if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
00985 return false;
00986 try_next = false;
00987 }
00988
00989 else switch(*tcode)
00990 {
00991 default:
00992 return false;
00993
00994
00995
00996 case OP_CALLOUT:
00997 tcode += 2;
00998 break;
00999
01000
01001
01002 case OP_BRANUMBER:
01003 tcode += 3;
01004 break;
01005
01006
01007
01008 case OP_ASSERT_NOT:
01009 case OP_ASSERTBACK:
01010 case OP_ASSERTBACK_NOT:
01011 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
01012 tcode += 1+LINK_SIZE;
01013 break;
01014
01015
01016
01017 case OP_OPT:
01018 caseless = (tcode[1] & PCRE_CASELESS) != 0;
01019 tcode += 2;
01020 break;
01021
01022
01023
01024 case OP_BRAZERO:
01025 case OP_BRAMINZERO:
01026 if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
01027 return false;
01028 dummy = 1;
01029 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
01030 tcode += 1+LINK_SIZE;
01031 break;
01032
01033
01034
01035 case OP_STAR:
01036 case OP_MINSTAR:
01037 case OP_QUERY:
01038 case OP_MINQUERY:
01039 set_bit(start_bits, tcode[1], caseless, cd);
01040 tcode += 2;
01041 break;
01042
01043
01044
01045 case OP_UPTO:
01046 case OP_MINUPTO:
01047 set_bit(start_bits, tcode[3], caseless, cd);
01048 tcode += 4;
01049 break;
01050
01051
01052
01053 case OP_EXACT:
01054 tcode++;
01055
01056 case OP_CHARS:
01057 tcode++;
01058
01059 case OP_PLUS:
01060 case OP_MINPLUS:
01061 set_bit(start_bits, tcode[1], caseless, cd);
01062 try_next = false;
01063 break;
01064
01065
01066
01067 case OP_NOT_DIGIT:
01068 for (c = 0; c < 32; c++)
01069 start_bits[c] |= ~cd->cbits[c+cbit_digit];
01070 try_next = false;
01071 break;
01072
01073 case OP_DIGIT:
01074 for (c = 0; c < 32; c++)
01075 start_bits[c] |= cd->cbits[c+cbit_digit];
01076 try_next = false;
01077 break;
01078
01079 case OP_NOT_WHITESPACE:
01080 for (c = 0; c < 32; c++)
01081 start_bits[c] |= ~cd->cbits[c+cbit_space];
01082 try_next = false;
01083 break;
01084
01085 case OP_WHITESPACE:
01086 for (c = 0; c < 32; c++)
01087 start_bits[c] |= cd->cbits[c+cbit_space];
01088 try_next = false;
01089 break;
01090
01091 case OP_NOT_WORDCHAR:
01092 for (c = 0; c < 32; c++)
01093 start_bits[c] |= ~cd->cbits[c+cbit_word];
01094 try_next = false;
01095 break;
01096
01097 case OP_WORDCHAR:
01098 for (c = 0; c < 32; c++)
01099 start_bits[c] |= cd->cbits[c+cbit_word];
01100 try_next = false;
01101 break;
01102
01103
01104
01105
01106 case OP_TYPEPLUS:
01107 case OP_TYPEMINPLUS:
01108 tcode++;
01109 break;
01110
01111 case OP_TYPEEXACT:
01112 tcode += 3;
01113 break;
01114
01115
01116
01117
01118 case OP_TYPEUPTO:
01119 case OP_TYPEMINUPTO:
01120 tcode += 2;
01121
01122 case OP_TYPESTAR:
01123 case OP_TYPEMINSTAR:
01124 case OP_TYPEQUERY:
01125 case OP_TYPEMINQUERY:
01126 switch(tcode[1])
01127 {
01128 case OP_NOT_DIGIT:
01129 for (c = 0; c < 32; c++)
01130 start_bits[c] |= ~cd->cbits[c+cbit_digit];
01131 break;
01132
01133 case OP_DIGIT:
01134 for (c = 0; c < 32; c++)
01135 start_bits[c] |= cd->cbits[c+cbit_digit];
01136 break;
01137
01138 case OP_NOT_WHITESPACE:
01139 for (c = 0; c < 32; c++)
01140 start_bits[c] |= ~cd->cbits[c+cbit_space];
01141 break;
01142
01143 case OP_WHITESPACE:
01144 for (c = 0; c < 32; c++)
01145 start_bits[c] |= cd->cbits[c+cbit_space];
01146 break;
01147
01148 case OP_NOT_WORDCHAR:
01149 for (c = 0; c < 32; c++)
01150 start_bits[c] |= ~cd->cbits[c+cbit_word];
01151 break;
01152
01153 case OP_WORDCHAR:
01154 for (c = 0; c < 32; c++)
01155 start_bits[c] |= cd->cbits[c+cbit_word];
01156 break;
01157 }
01158
01159 tcode += 2;
01160 break;
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170 case OP_NCLASS:
01171 if (utf8) memset(start_bits+16, 0xff, 16);
01172
01173
01174 case OP_CLASS:
01175 {
01176 tcode++;
01177 for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
01178 tcode += 32;
01179 switch (*tcode)
01180 {
01181 case OP_CRSTAR:
01182 case OP_CRMINSTAR:
01183 case OP_CRQUERY:
01184 case OP_CRMINQUERY:
01185 tcode++;
01186 break;
01187
01188 case OP_CRRANGE:
01189 case OP_CRMINRANGE:
01190 if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
01191 else try_next = false;
01192 break;
01193
01194 default:
01195 try_next = false;
01196 break;
01197 }
01198 }
01199 break;
01200
01201 }
01202 }
01203
01204 code += GET(code, 1);
01205 }
01206 while (*code == OP_ALT);
01207 return true;
01208 }
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231 pcre_extra *
01232 pcre_study(const pcre *external_re, int options, const char **errorptr)
01233 {
01234 uschar start_bits[32];
01235 pcre_extra *extra;
01236 pcre_study_data *study;
01237 const real_pcre *re = (const real_pcre *)external_re;
01238 uschar *code = (uschar *)re + sizeof(real_pcre) +
01239 (re->name_count * re->name_entry_size);
01240 compile_data compile_block;
01241
01242 *errorptr = NULL;
01243
01244 if (re == NULL || re->magic_number != MAGIC_NUMBER)
01245 {
01246 *errorptr = "argument is not a compiled regular expression";
01247 return NULL;
01248 }
01249
01250 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
01251 {
01252 *errorptr = "unknown or incorrect option bit(s) set";
01253 return NULL;
01254 }
01255
01256
01257
01258
01259
01260 if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
01261 return NULL;
01262
01263
01264
01265 compile_block.lcc = re->tables + lcc_offset;
01266 compile_block.fcc = re->tables + fcc_offset;
01267 compile_block.cbits = re->tables + cbits_offset;
01268 compile_block.ctypes = re->tables + ctypes_offset;
01269
01270
01271
01272 memset(start_bits, 0, 32 * sizeof(uschar));
01273 if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
01274 (re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
01275
01276
01277
01278
01279
01280
01281
01282
01283 extra = static_cast<pcre_extra *>(malloc(sizeof(pcre_extra) + sizeof(pcre_study_data)));
01284
01285 if (extra == NULL)
01286 {
01287 *errorptr = "failed to get memory";
01288 return NULL;
01289 }
01290
01291
01292 study = reinterpret_cast<pcre_study_data *>(reinterpret_cast<char*>(extra) + sizeof(pcre_extra));
01293 extra->flags = PCRE_EXTRA_STUDY_DATA;
01294 extra->study_data = study;
01295
01296 study->size = sizeof(pcre_study_data);
01297 study->options = PCRE_STUDY_MAPPED;
01298 memcpy(study->start_bits, start_bits, sizeof(start_bits));
01299
01300 return extra;
01301 }
01302
01303
01304
01305 #define DPRINTF(p)
01306
01307
01308
01309
01310
01311
01312
01313 #define BRASTACK_SIZE 200
01314
01315
01316
01317
01318
01319
01320 #define REC_STACK_SAVE_MAX 30
01321
01322
01323
01324
01325
01326
01327
01328 #define MAXLIT 250
01329
01330
01331
01332
01333
01334 #define REQ_BYTE_MAX 1000
01335
01336
01337
01338
01339
01340 static const uschar OP_lengths[] = { OP_LENGTHS };
01341
01342
01343
01344 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
01345 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
01346
01347
01348
01349
01350
01351
01352 static const short int escapes[] = {
01353 0, 0, 0, 0, 0, 0, 0, 0,
01354 0, 0, ':', ';', '<', '=', '>', '?',
01355 '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
01356 0, 0, 0, 0, 0, 0, 0, 0,
01357 0, -ESC_Q, 0, -ESC_S, 0, 0, 0, -ESC_W,
01358 0, 0, -ESC_Z, '[', '\\', ']', '^', '_',
01359 '`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
01360 0, 0, 0, 0, 0, 0, ESC_n, 0,
01361 0, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w,
01362 0, 0, -ESC_z
01363 };
01364
01365
01366
01367
01368
01369 static const char *const posix_names[] = {
01370 "alpha", "lower", "upper",
01371 "alnum", "ascii", "blank", "cntrl", "digit", "graph",
01372 "print", "punct", "space", "word", "xdigit" };
01373
01374 static const uschar posix_name_lengths[] = {
01375 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
01376
01377
01378
01379
01380
01381 static const int posix_class_maps[] = {
01382 cbit_lower, cbit_upper, -1,
01383 cbit_lower, -1, -1,
01384 cbit_upper, -1, -1,
01385 cbit_digit, cbit_lower, cbit_upper,
01386 cbit_print, cbit_cntrl, -1,
01387 cbit_space, -1, -1,
01388 cbit_cntrl, -1, -1,
01389 cbit_digit, -1, -1,
01390 cbit_graph, -1, -1,
01391 cbit_print, -1, -1,
01392 cbit_punct, -1, -1,
01393 cbit_space, -1, -1,
01394 cbit_word, -1, -1,
01395 cbit_xdigit,-1, -1
01396 };
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413
01414 static const unsigned char digitab[] =
01415 {
01416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01418 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01419 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01420 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01421 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01422 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
01423 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,
01424 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
01425 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01426 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01427 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01428 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
01429 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01430 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01431 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01432 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01433 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01434 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01435 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01436 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01437 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01443 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01444 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01445 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01446 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
01447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
01448
01449
01450
01451 static bool
01452 compile_regex(int, int, int *, uschar **, const uschar **, const char **,
01453 bool, int, int *, int *, branch_chain *, compile_data *);
01454
01455
01456
01457
01458
01459
01460 typedef struct eptrblock {
01461 struct eptrblock *epb_prev;
01462 const uschar *epb_saved_eptr;
01463 } eptrblock;
01464
01465
01466
01467 #define match_condassert 0x01
01468 #define match_isgroup 0x02
01469
01470
01471
01472
01473 #define MATCH_MATCH 1
01474 #define MATCH_NOMATCH 0
01475
01476
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488 int (*pcre_callout)(pcre_callout_block *) = NULL;
01489
01490
01491
01492
01493
01494
01495 #define GETCHAR(c, eptr) c = *eptr;
01496 #define GETCHARINC(c, eptr) c = *eptr++;
01497 #define GETCHARINCTEST(c, eptr) c = *eptr++;
01498 #define GETCHARLEN(c, eptr, len) c = *eptr;
01499 #define BACKCHAR(eptr)
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523 static int
01524 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
01525 int options, bool isclass)
01526 {
01527 const uschar *ptr = *ptrptr;
01528 int c, i;
01529
01530
01531
01532 c = *(++ptr);
01533 if (c == 0) *errorptr = ERR1;
01534
01535
01536
01537
01538
01539 else if (c < '0' || c > 'z') {}
01540 else if ((i = escapes[c - '0']) != 0) c = i;
01541
01542
01543
01544 else
01545 {
01546 const uschar *oldptr;
01547 switch (c)
01548 {
01549
01550
01551
01552 case 'l':
01553 case 'L':
01554 case 'N':
01555 case 'p':
01556 case 'P':
01557 case 'u':
01558 case 'U':
01559 case 'X':
01560 *errorptr = ERR37;
01561 break;
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575 case '1': case '2': case '3': case '4': case '5':
01576 case '6': case '7': case '8': case '9':
01577
01578 if (!isclass)
01579 {
01580 oldptr = ptr;
01581 c -= '0';
01582 while ((digitab[ptr[1]] & ctype_digit) != 0)
01583 c = c * 10 + *(++ptr) - '0';
01584 if (c < 10 || c <= bracount)
01585 {
01586 c = -(ESC_REF + c);
01587 break;
01588 }
01589 ptr = oldptr;
01590 }
01591
01592
01593
01594
01595
01596 if ((c = *ptr) >= '8')
01597 {
01598 ptr--;
01599 c = 0;
01600 break;
01601 }
01602
01603
01604
01605
01606 case '0':
01607 c -= '0';
01608 while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
01609 c = c * 8 + *(++ptr) - '0';
01610 c &= 255;
01611 break;
01612
01613
01614
01615
01616 case 'x':
01617
01618
01619
01620 c = 0;
01621 while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
01622 {
01623 int cc;
01624 cc = *(++ptr);
01625 if (cc >= 'a') cc -= 32;
01626 c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
01627 }
01628 break;
01629
01630
01631
01632 case 'c':
01633 c = *(++ptr);
01634 if (c == 0)
01635 {
01636 *errorptr = ERR2;
01637 return 0;
01638 }
01639
01640
01641
01642
01643 if (c >= 'a' && c <= 'z') c -= 32;
01644 c ^= 0x40;
01645 break;
01646
01647
01648
01649
01650
01651
01652
01653 default:
01654 if ((options & PCRE_EXTRA) != 0)
01655 {
01656 *errorptr = ERR3;
01657 }
01658 break;
01659 }
01660 }
01661
01662 *ptrptr = ptr;
01663 return c;
01664 }
01665
01666
01667
01668
01669
01670
01671
01672
01673
01674
01675
01676
01677
01678
01679
01680
01681
01682
01683 static bool
01684 is_counted_repeat(const uschar *p)
01685 {
01686 if ((digitab[*p++] & ctype_digit) == 0) return false;
01687 while ((digitab[*p] & ctype_digit) != 0) p++;
01688 if (*p == '}') return true;
01689
01690 if (*p++ != ',') return false;
01691 if (*p == '}') return true;
01692
01693 if ((digitab[*p++] & ctype_digit) == 0) return false;
01694 while ((digitab[*p] & ctype_digit) != 0) p++;
01695
01696 return (*p == '}');
01697 }
01698
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720 static const uschar *
01721 read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
01722 {
01723 int min = 0;
01724 int max = -1;
01725
01726 while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
01727
01728 if (*p == '}') max = min; else
01729 {
01730 if (*(++p) != '}')
01731 {
01732 max = 0;
01733 while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
01734 if (max < min)
01735 {
01736 *errorptr = ERR4;
01737 return p;
01738 }
01739 }
01740 }
01741
01742
01743
01744
01745 if (min < 0 || 65535 < min ||
01746 max < -1 || 65535 < max)
01747 *errorptr = ERR5;
01748 else
01749 {
01750 *minp = min;
01751 *maxp = max;
01752 }
01753 return p;
01754 }
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767
01768
01769
01770
01771
01772
01773
01774
01775 static const uschar*
01776 first_significant_code(const uschar *code, int *options, int optbit)
01777 {
01778 for (;;)
01779 {
01780 switch ((int)*code)
01781 {
01782 case OP_OPT:
01783 if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
01784 *options = (int)code[1];
01785 code += 2;
01786 break;
01787
01788 case OP_ASSERT_NOT:
01789 case OP_ASSERTBACK:
01790 case OP_ASSERTBACK_NOT:
01791 do code += GET(code, 1); while (*code == OP_ALT);
01792
01793
01794 case OP_CALLOUT:
01795 case OP_CREF:
01796 case OP_BRANUMBER:
01797 case OP_WORD_BOUNDARY:
01798 case OP_NOT_WORD_BOUNDARY:
01799 code += OP_lengths[*code];
01800 break;
01801
01802 default:
01803 return code;
01804 }
01805 }
01806
01807 }
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828 static int
01829 find_fixedlength(uschar *code, int options)
01830 {
01831 int length = -1;
01832
01833 register int branchlength = 0;
01834 register uschar *cc = code + 1 + LINK_SIZE;
01835
01836
01837
01838
01839 for (;;)
01840 {
01841 int d;
01842 register int op = *cc;
01843 if (op >= OP_BRA) op = OP_BRA;
01844
01845 switch (op)
01846 {
01847 case OP_BRA:
01848 case OP_ONCE:
01849 case OP_COND:
01850 d = find_fixedlength(cc, options);
01851 if (d < 0) return d;
01852 branchlength += d;
01853 do cc += GET(cc, 1); while (*cc == OP_ALT);
01854 cc += 1 + LINK_SIZE;
01855 break;
01856
01857
01858
01859
01860
01861 case OP_ALT:
01862 case OP_KET:
01863 case OP_KETRMAX:
01864 case OP_KETRMIN:
01865 case OP_END:
01866 if (length < 0) length = branchlength;
01867 else if (length != branchlength) return -1;
01868 if (*cc != OP_ALT) return length;
01869 cc += 1 + LINK_SIZE;
01870 branchlength = 0;
01871 break;
01872
01873
01874
01875 case OP_ASSERT:
01876 case OP_ASSERT_NOT:
01877 case OP_ASSERTBACK:
01878 case OP_ASSERTBACK_NOT:
01879 do cc += GET(cc, 1); while (*cc == OP_ALT);
01880
01881
01882
01883
01884 case OP_REVERSE:
01885 case OP_BRANUMBER:
01886 case OP_CREF:
01887 case OP_OPT:
01888 case OP_CALLOUT:
01889 case OP_SOD:
01890 case OP_SOM:
01891 case OP_EOD:
01892 case OP_EODN:
01893 case OP_CIRC:
01894 case OP_DOLL:
01895 case OP_NOT_WORD_BOUNDARY:
01896 case OP_WORD_BOUNDARY:
01897 cc += OP_lengths[*cc];
01898 break;
01899
01900
01901
01902
01903
01904
01905 case OP_CHARS:
01906 branchlength += *(++cc);
01907 cc += *cc + 1;
01908 break;
01909
01910
01911
01912
01913 case OP_EXACT:
01914 branchlength += GET2(cc,1);
01915 cc += 4;
01916 break;
01917
01918 case OP_TYPEEXACT:
01919 branchlength += GET2(cc,1);
01920 cc += 4;
01921 break;
01922
01923
01924
01925 case OP_NOT_DIGIT:
01926 case OP_DIGIT:
01927 case OP_NOT_WHITESPACE:
01928 case OP_WHITESPACE:
01929 case OP_NOT_WORDCHAR:
01930 case OP_WORDCHAR:
01931 case OP_ANY:
01932 branchlength++;
01933 cc++;
01934 break;
01935
01936
01937
01938 case OP_ANYBYTE:
01939 return -2;
01940
01941
01942
01943
01944 case OP_CLASS:
01945 case OP_NCLASS:
01946 cc += 33;
01947
01948 switch (*cc)
01949 {
01950 case OP_CRSTAR:
01951 case OP_CRMINSTAR:
01952 case OP_CRQUERY:
01953 case OP_CRMINQUERY:
01954 return -1;
01955
01956 case OP_CRRANGE:
01957 case OP_CRMINRANGE:
01958 if (GET2(cc,1) != GET2(cc,3)) return -1;
01959 branchlength += GET2(cc,1);
01960 cc += 5;
01961 break;
01962
01963 default:
01964 branchlength++;
01965 }
01966 break;
01967
01968
01969
01970 default:
01971 return -1;
01972 }
01973 }
01974
01975 }
01976
01977
01978
01979
01980
01981
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992
01993
01994
01995 static const uschar *
01996 find_bracket(const uschar *code, int number)
01997 {
01998
01999 for (;;)
02000 {
02001 register int c = *code;
02002 if (c == OP_END) return NULL;
02003 else if (c == OP_CHARS) code += code[1] + OP_lengths[c];
02004 else if (c > OP_BRA)
02005 {
02006 int n = c - OP_BRA;
02007 if (n > EXTRACT_BASIC_MAX) n = GET2(code, 2+LINK_SIZE);
02008 if (n == number) return (uschar *)code;
02009 code += OP_lengths[OP_BRA];
02010 }
02011 else
02012 {
02013 code += OP_lengths[c];
02014
02015 }
02016 }
02017 }
02018
02019
02020
02021
02022
02023
02024
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035 static const uschar *
02036 find_recurse(const uschar *code, bool utf8)
02037 {
02038 utf8 = utf8;
02039
02040 for (;;)
02041 {
02042 register int c = *code;
02043 if (c == OP_END) return NULL;
02044 else if (c == OP_RECURSE) return code;
02045 else if (c == OP_CHARS) code += code[1] + OP_lengths[c];
02046 else if (c > OP_BRA)
02047 {
02048 code += OP_lengths[OP_BRA];
02049 }
02050 else
02051 {
02052 code += OP_lengths[c];
02053
02054 }
02055 }
02056 }
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073
02074
02075
02076
02077
02078 static bool
02079 could_be_empty_branch(const uschar *code, const uschar *endcode, bool utf8)
02080 {
02081 register int c;
02082 for (code = first_significant_code(code + 1 + LINK_SIZE, NULL, 0);
02083 code < endcode;
02084 code = first_significant_code(code + OP_lengths[c], NULL, 0))
02085 {
02086 const uschar *ccode;
02087
02088 c = *code;
02089
02090 if (c >= OP_BRA)
02091 {
02092 bool empty_branch;
02093 if (GET(code, 1) == 0) return true;
02094
02095
02096
02097 empty_branch = false;
02098 do
02099 {
02100 if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
02101 empty_branch = true;
02102 code += GET(code, 1);
02103 }
02104 while (*code == OP_ALT);
02105 if (!empty_branch) return false;
02106 code += 1 + LINK_SIZE;
02107 c = *code;
02108 }
02109
02110 else switch (c)
02111 {
02112
02113
02114
02115 case OP_CLASS:
02116 case OP_NCLASS:
02117 ccode = code + 33;
02118
02119
02120 switch (*ccode)
02121 {
02122 case OP_CRSTAR:
02123 case OP_CRMINSTAR:
02124 case OP_CRQUERY:
02125 case OP_CRMINQUERY:
02126 break;
02127
02128 default:
02129 case OP_CRPLUS:
02130 case OP_CRMINPLUS:
02131 return false;
02132
02133 case OP_CRRANGE:
02134 case OP_CRMINRANGE:
02135 if (GET2(ccode, 1) > 0) return false;
02136 break;
02137 }
02138 break;
02139
02140
02141
02142 case OP_NOT_DIGIT:
02143 case OP_DIGIT:
02144 case OP_NOT_WHITESPACE:
02145 case OP_WHITESPACE:
02146 case OP_NOT_WORDCHAR:
02147 case OP_WORDCHAR:
02148 case OP_ANY:
02149 case OP_ANYBYTE:
02150 case OP_CHARS:
02151 case OP_NOT:
02152 case OP_PLUS:
02153 case OP_MINPLUS:
02154 case OP_EXACT:
02155 case OP_NOTPLUS:
02156 case OP_NOTMINPLUS:
02157 case OP_NOTEXACT:
02158 case OP_TYPEPLUS:
02159 case OP_TYPEMINPLUS:
02160 case OP_TYPEEXACT:
02161 return false;
02162
02163
02164
02165 case OP_KET:
02166 case OP_KETRMAX:
02167 case OP_KETRMIN:
02168 case OP_ALT:
02169 return true;
02170
02171 }
02172 }
02173
02174 return true;
02175 }
02176
02177
02178
02179
02180
02181
02182
02183
02184
02185
02186
02187
02188
02189
02190
02191
02192
02193
02194
02195
02196
02197 static bool
02198 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
02199 bool utf8)
02200 {
02201 while (bcptr != NULL && bcptr->current >= code)
02202 {
02203 if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return false;
02204 bcptr = bcptr->outer;
02205 }
02206 return true;
02207 }
02208
02209
02210
02211
02212
02213
02214
02215
02216
02217
02218
02219
02220
02221
02222
02223
02224
02225
02226
02227
02228 static bool
02229 check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
02230 {
02231 int terminator;
02232 terminator = *(++ptr);
02233 if (*(++ptr) == '^') ptr++;
02234 while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
02235 if (*ptr == terminator && ptr[1] == ']')
02236 {
02237 *endptr = ptr;
02238 return true;
02239 }
02240 return false;
02241 }
02242
02243
02244
02245
02246
02247
02248
02249
02250
02251
02252
02253
02254
02255
02256
02257
02258
02259
02260 static int
02261 check_posix_name(const uschar *ptr, int len)
02262 {
02263 register int yield = 0;
02264 while (posix_name_lengths[yield] != 0)
02265 {
02266 if (len == posix_name_lengths[yield] &&
02267 strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
02268 yield++;
02269 }
02270 return -1;
02271 }
02272
02273
02274
02275
02276
02277
02278
02279
02280
02281
02282
02283
02284
02285
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296
02297 static void
02298 adjust_recurse(uschar *group, int adjust, bool utf8, compile_data *cd)
02299 {
02300 uschar *ptr = group;
02301 while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
02302 {
02303 int offset = GET(ptr, 1);
02304 if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
02305 ptr += 1 + LINK_SIZE;
02306 }
02307 }
02308
02309
02310
02311
02312
02313
02314
02315
02316
02317
02318
02319
02320
02321
02322
02323
02324
02325
02326
02327
02328
02329
02330
02331
02332
02333
02334 static bool
02335 compile_branch(int *optionsptr, int *brackets, uschar **codeptr,
02336 const uschar **ptrptr, const char **errorptr, int *firstbyteptr,
02337 int *reqbyteptr, branch_chain *bcptr, compile_data *cd)
02338 {
02339 int repeat_type, op_type;
02340 int repeat_min = 0, repeat_max = 0;
02341 int bravalue = 0;
02342 int length;
02343 int greedy_default, greedy_non_default;
02344 int firstbyte, reqbyte;
02345 int zeroreqbyte, zerofirstbyte;
02346 int req_caseopt, reqvary, tempreqvary;
02347 int condcount = 0;
02348 int options = *optionsptr;
02349 register int c;
02350 register uschar *code = *codeptr;
02351 uschar *tempcode;
02352 bool inescq = false;
02353 bool groupsetfirstbyte = false;
02354 const uschar *ptr = *ptrptr;
02355 const uschar *tempptr;
02356 uschar *previous = NULL;
02357 uschar classa[32];
02358
02359 bool utf8 = false;
02360
02361
02362
02363 greedy_default = ((options & PCRE_UNGREEDY) != 0);
02364 greedy_non_default = greedy_default ^ 1;
02365
02366
02367
02368
02369
02370
02371
02372
02373
02374
02375
02376 firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;
02377
02378
02379
02380
02381
02382
02383 req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
02384
02385
02386
02387 for (;; ptr++)
02388 {
02389 bool negate_class;
02390 bool possessive_quantifier;
02391 int class_charcount;
02392 int class_lastchar;
02393 int newoptions;
02394 int recno;
02395 int skipbytes;
02396 int subreqbyte;
02397 int subfirstbyte;
02398
02399 c = *ptr;
02400 if (inescq && c != 0) goto NORMAL_CHAR;
02401
02402 if ((options & PCRE_EXTENDED) != 0)
02403 {
02404 if ((cd->ctypes[c] & ctype_space) != 0) continue;
02405 if (c == '#')
02406 {
02407
02408
02409 while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
02410 if (c != 0) continue;
02411 }
02412 }
02413
02414 switch(c)
02415 {
02416
02417
02418 case 0:
02419 case '|':
02420 case ')':
02421 *firstbyteptr = firstbyte;
02422 *reqbyteptr = reqbyte;
02423 *codeptr = code;
02424 *ptrptr = ptr;
02425 return true;
02426
02427
02428
02429
02430 case '^':
02431 if ((options & PCRE_MULTILINE) != 0)
02432 {
02433 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02434 }
02435 previous = NULL;
02436 *code++ = OP_CIRC;
02437 break;
02438
02439 case '$':
02440 previous = NULL;
02441 *code++ = OP_DOLL;
02442 break;
02443
02444
02445
02446
02447 case '.':
02448 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02449 zerofirstbyte = firstbyte;
02450 zeroreqbyte = reqbyte;
02451 previous = code;
02452 *code++ = OP_ANY;
02453 break;
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467 case '[':
02468 previous = code;
02469
02470
02471
02472
02473 if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
02474 check_posix_syntax(ptr, &tempptr, cd))
02475 {
02476 *errorptr = (ptr[1] == ':')? ERR13 : ERR31;
02477 goto FAILED;
02478 }
02479
02480
02481
02482 if ((c = *(++ptr)) == '^')
02483 {
02484 negate_class = true;
02485 c = *(++ptr);
02486 }
02487 else
02488 {
02489 negate_class = false;
02490 }
02491
02492
02493
02494
02495
02496 class_charcount = 0;
02497 class_lastchar = -1;
02498
02499
02500
02501
02502
02503
02504
02505 memset(classa, 0, 32 * sizeof(uschar));
02506
02507
02508
02509
02510
02511
02512
02513 do
02514 {
02515
02516
02517
02518 if (inescq)
02519 {
02520 if (c == '\\' && ptr[1] == 'E')
02521 {
02522 inescq = false;
02523 ptr++;
02524 continue;
02525 }
02526 else goto LONE_SINGLE_CHARACTER;
02527 }
02528
02529
02530
02531
02532
02533
02534
02535 if (c == '[' &&
02536 (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
02537 check_posix_syntax(ptr, &tempptr, cd))
02538 {
02539 bool local_negate = false;
02540 int posix_class, i;
02541 register const uschar *cbits = cd->cbits;
02542
02543 if (ptr[1] != ':')
02544 {
02545 *errorptr = ERR31;
02546 goto FAILED;
02547 }
02548
02549 ptr += 2;
02550 if (*ptr == '^')
02551 {
02552 local_negate = true;
02553 ptr++;
02554 }
02555
02556 posix_class = check_posix_name(ptr, tempptr - ptr);
02557 if (posix_class < 0)
02558 {
02559 *errorptr = ERR30;
02560 goto FAILED;
02561 }
02562
02563
02564
02565
02566
02567 if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
02568 posix_class = 0;
02569
02570
02571
02572
02573
02574
02575 posix_class *= 3;
02576 for (i = 0; i < 3; i++)
02577 {
02578 bool blankclass = strncmp((char *)ptr, "blank", 5) == 0;
02579 int taboffset = posix_class_maps[posix_class + i];
02580 if (taboffset < 0) break;
02581 if (local_negate)
02582 {
02583 for (c = 0; c < 32; c++) classa[c] |= ~cbits[c+taboffset];
02584 if (blankclass) classa[1] |= 0x3c;
02585 }
02586 else
02587 {
02588 for (c = 0; c < 32; c++) classa[c] |= cbits[c+taboffset];
02589 if (blankclass) classa[1] &= ~0x3c;
02590 }
02591 }
02592
02593 ptr = tempptr + 1;
02594 class_charcount = 10;
02595 continue;
02596 }
02597
02598
02599
02600
02601
02602
02603
02604
02605
02606 if (c == '\\')
02607 {
02608 c = check_escape(&ptr, errorptr, *brackets, options, true);
02609 if (-c == ESC_b) c = '\b';
02610
02611 if (-c == ESC_Q)
02612 {
02613 if (ptr[1] == '\\' && ptr[2] == 'E')
02614 {
02615 ptr += 2;
02616 }
02617 else inescq = true;
02618 continue;
02619 }
02620
02621 else if (c < 0)
02622 {
02623 register const uschar *cbits = cd->cbits;
02624 class_charcount = 10;
02625 switch (-c)
02626 {
02627 case ESC_d:
02628 for (c = 0; c < 32; c++) classa[c] |= cbits[c+cbit_digit];
02629 continue;
02630
02631 case ESC_D:
02632 for (c = 0; c < 32; c++) classa[c] |= ~cbits[c+cbit_digit];
02633 continue;
02634
02635 case ESC_w:
02636 for (c = 0; c < 32; c++) classa[c] |= cbits[c+cbit_word];
02637 continue;
02638
02639 case ESC_W:
02640 for (c = 0; c < 32; c++) classa[c] |= ~cbits[c+cbit_word];
02641 continue;
02642
02643 case ESC_s:
02644 for (c = 0; c < 32; c++) classa[c] |= cbits[c+cbit_space];
02645 classa[1] &= ~0x08;
02646 continue;
02647
02648 case ESC_S:
02649 for (c = 0; c < 32; c++) classa[c] |= ~cbits[c+cbit_space];
02650 classa[1] |= 0x08;
02651 continue;
02652
02653
02654
02655
02656
02657 default:
02658 if ((options & PCRE_EXTRA) != 0)
02659 {
02660 *errorptr = ERR7;
02661 goto FAILED;
02662 }
02663 c = *ptr;
02664 }
02665 }
02666
02667
02668
02669
02670 }
02671
02672
02673
02674
02675
02676 if (ptr[1] == '-' && ptr[2] != ']')
02677 {
02678 int d;
02679 ptr += 2;
02680
02681 d = *ptr;
02682
02683
02684
02685
02686
02687 if (d == '\\')
02688 {
02689 const uschar *oldptr = ptr;
02690 d = check_escape(&ptr, errorptr, *brackets, options, true);
02691
02692
02693
02694 if (d < 0)
02695 {
02696 if (d == -ESC_b) d = '\b'; else
02697 {
02698 ptr = oldptr - 2;
02699 goto LONE_SINGLE_CHARACTER;
02700 }
02701 }
02702 }
02703
02704
02705
02706 if (d < c)
02707 {
02708 *errorptr = ERR8;
02709 goto FAILED;
02710 }
02711
02712
02713
02714
02715
02716
02717
02718
02719
02720
02721 for (; c <= d; c++)
02722 {
02723 classa[c/8] |= (1 << (c&7));
02724 if ((options & PCRE_CASELESS) != 0)
02725 {
02726 int uc = cd->fcc[c];
02727 classa[uc/8] |= (1 << (uc&7));
02728 }
02729 class_charcount++;
02730 class_lastchar = c;
02731 }
02732
02733 continue;
02734 }
02735
02736
02737
02738
02739 LONE_SINGLE_CHARACTER:
02740
02741
02742 {
02743 classa[c/8] |= (1 << (c&7));
02744 if ((options & PCRE_CASELESS) != 0)
02745 {
02746 c = cd->fcc[c];
02747 classa[c/8] |= (1 << (c&7));
02748 }
02749 class_charcount++;
02750 class_lastchar = c;
02751 }
02752 }
02753
02754
02755
02756
02757 while ((c = *(++ptr)) != ']' || inescq);
02758
02759
02760
02761
02762
02763
02764
02765
02766
02767
02768
02769
02770 if (class_charcount == 1)
02771 {
02772 zeroreqbyte = reqbyte;
02773 if (negate_class)
02774 {
02775 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02776 zerofirstbyte = firstbyte;
02777 *code++ = OP_NOT;
02778 }
02779 else
02780 {
02781 if (firstbyte == REQ_UNSET)
02782 {
02783 zerofirstbyte = REQ_NONE;
02784 firstbyte = class_lastchar | req_caseopt;
02785 }
02786 else
02787 {
02788 zerofirstbyte = firstbyte;
02789 reqbyte = class_lastchar | req_caseopt | cd->req_varyopt;
02790 }
02791 *code++ = OP_CHARS;
02792 *code++ = 1;
02793 }
02794 *code++ = class_lastchar;
02795 break;
02796 }
02797
02798
02799
02800
02801
02802 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02803 zerofirstbyte = firstbyte;
02804 zeroreqbyte = reqbyte;
02805
02806
02807
02808
02809
02810
02811
02812
02813
02814
02815
02816 if (negate_class)
02817 {
02818 *code++ = OP_NCLASS;
02819 for (c = 0; c < 32; c++) code[c] = ~classa[c];
02820 }
02821 else
02822 {
02823 *code++ = OP_CLASS;
02824 memcpy(code, classa, 32);
02825 }
02826 code += 32;
02827 break;
02828
02829
02830
02831 case '{':
02832 if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
02833 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);
02834 if (*errorptr != NULL) goto FAILED;
02835 goto REPEAT;
02836
02837 case '*':
02838 repeat_min = 0;
02839 repeat_max = -1;
02840 goto REPEAT;
02841
02842 case '+':
02843 repeat_min = 1;
02844 repeat_max = -1;
02845 goto REPEAT;
02846
02847 case '?':
02848 repeat_min = 0;
02849 repeat_max = 1;
02850
02851 REPEAT:
02852 if (previous == NULL)
02853 {
02854 *errorptr = ERR9;
02855 goto FAILED;
02856 }
02857
02858 if (repeat_min == 0)
02859 {
02860 firstbyte = zerofirstbyte;
02861 reqbyte = zeroreqbyte;
02862 }
02863
02864
02865
02866 reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
02867
02868 op_type = 0;
02869 possessive_quantifier = false;
02870
02871
02872
02873
02874 tempcode = previous;
02875
02876
02877
02878
02879
02880
02881
02882 if (ptr[1] == '+')
02883 {
02884 repeat_type = 0;
02885 possessive_quantifier = true;
02886 ptr++;
02887 }
02888 else if (ptr[1] == '?')
02889 {
02890 repeat_type = greedy_non_default;
02891 ptr++;
02892 }
02893 else repeat_type = greedy_default;
02894
02895
02896
02897
02898 if (*previous == OP_RECURSE)
02899 {
02900 memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
02901 code += 1 + LINK_SIZE;
02902 *previous = OP_BRA;
02903 PUT(previous, 1, code - previous);
02904 *code = OP_KET;
02905 PUT(code, 1, code - previous);
02906 code += 1 + LINK_SIZE;
02907 }
02908
02909
02910
02911
02912
02913
02914
02915
02916 if (*previous == OP_CHARS)
02917 {
02918
02919
02920
02921
02922
02923
02924
02925
02926
02927 {
02928 c = *(--code);
02929 if (code == previous + 2)
02930 {
02931 code = previous;
02932 if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;
02933 }
02934 else
02935 {
02936 previous[1]--;
02937 tempcode = code;
02938 }
02939 }
02940
02941 goto OUTPUT_SINGLE_REPEAT;
02942 }
02943
02944
02945
02946
02947
02948
02949 else if (*previous == OP_NOT)
02950 {
02951 op_type = OP_NOTSTAR - OP_STAR;
02952 c = previous[1];
02953 code = previous;
02954 goto OUTPUT_SINGLE_REPEAT;
02955 }
02956
02957
02958
02959
02960
02961 else if (*previous < OP_EODN)
02962 {
02963 op_type = OP_TYPESTAR - OP_STAR;
02964 c = *previous;
02965 code = previous;
02966
02967 OUTPUT_SINGLE_REPEAT:
02968
02969
02970
02971
02972 if (repeat_max == 0) goto END_REPEAT;
02973
02974
02975
02976 repeat_type += op_type;
02977
02978
02979
02980
02981 if (repeat_min == 0)
02982 {
02983 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
02984 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
02985 else
02986 {
02987 *code++ = OP_UPTO + repeat_type;
02988 PUT2INC(code, 0, repeat_max);
02989 }
02990 }
02991
02992
02993
02994 else if (repeat_min == 1 && repeat_max == -1)
02995 *code++ = OP_PLUS + repeat_type;
02996
02997
02998
02999
03000 else
03001 {
03002 if (repeat_min != 1)
03003 {
03004 *code++ = OP_EXACT + op_type;
03005 PUT2INC(code, 0, repeat_min);
03006 }
03007
03008
03009
03010
03011
03012
03013
03014
03015 else if (*previous == OP_CHARS)
03016 {
03017 if (code == previous) code += 2; else
03018
03019
03020
03021
03022 previous[1]++;
03023 }
03024
03025
03026
03027
03028
03029 else if (*previous == OP_NOT) code++;
03030
03031
03032
03033
03034
03035 if (repeat_max < 0)
03036 {
03037 *code++ = c;
03038 *code++ = OP_STAR + repeat_type;
03039 }
03040
03041
03042
03043
03044 else if (repeat_max != repeat_min)
03045 {
03046 *code++ = c;
03047 repeat_max -= repeat_min;
03048 *code++ = OP_UPTO + repeat_type;
03049 PUT2INC(code, 0, repeat_max);
03050 }
03051 }
03052
03053
03054
03055
03056 *code++ = c;
03057 }
03058
03059
03060
03061
03062 else if (*previous == OP_CLASS ||
03063 *previous == OP_NCLASS ||
03064 *previous == OP_REF)
03065 {
03066 if (repeat_max == 0)
03067 {
03068 code = previous;
03069 goto END_REPEAT;
03070 }
03071 if (repeat_min == 0 && repeat_max == -1)
03072 *code++ = OP_CRSTAR + repeat_type;
03073 else if (repeat_min == 1 && repeat_max == -1)
03074 *code++ = OP_CRPLUS + repeat_type;
03075 else if (repeat_min == 0 && repeat_max == 1)
03076 *code++ = OP_CRQUERY + repeat_type;
03077 else
03078 {
03079 *code++ = OP_CRRANGE + repeat_type;
03080 PUT2INC(code, 0, repeat_min);
03081 if (repeat_max == -1) repeat_max = 0;
03082 PUT2INC(code, 0, repeat_max);
03083 }
03084 }
03085
03086
03087
03088
03089 else if (*previous >= OP_BRA || *previous == OP_ONCE ||
03090 *previous == OP_COND)
03091 {
03092 register int i;
03093 int ketoffset = 0;
03094 int len = code - previous;
03095 uschar *bralink = NULL;
03096
03097
03098
03099
03100
03101
03102
03103 if (repeat_max == -1)
03104 {
03105 register uschar *ket = previous;
03106 do ket += GET(ket, 1); while (*ket != OP_KET);
03107 ketoffset = code - ket;
03108 }
03109
03110
03111
03112
03113
03114
03115
03116
03117 if (repeat_min == 0)
03118 {
03119
03120
03121
03122 if (repeat_max == 0)
03123 {
03124 code = previous;
03125 goto END_REPEAT;
03126 }
03127
03128
03129
03130
03131
03132
03133
03134 if (repeat_max <= 1)
03135 {
03136 *code = OP_END;
03137 adjust_recurse(previous, 1, utf8, cd);
03138 memmove(previous+1, previous, len);
03139 code++;
03140 *previous++ = OP_BRAZERO + repeat_type;
03141 }
03142
03143
03144
03145
03146
03147
03148
03149
03150
03151 else
03152 {
03153 int offset;
03154 *code = OP_END;
03155 adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd);
03156 memmove(previous + 2 + LINK_SIZE, previous, len);
03157 code += 2 + LINK_SIZE;
03158 *previous++ = OP_BRAZERO + repeat_type;
03159 *previous++ = OP_BRA;
03160
03161
03162
03163
03164 offset = (bralink == NULL)? 0 : previous - bralink;
03165 bralink = previous;
03166 PUTINC(previous, 0, offset);
03167 }
03168
03169 repeat_max--;
03170 }
03171
03172
03173
03174
03175
03176
03177 else
03178 {
03179 if (repeat_min > 1)
03180 {
03181 if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
03182 for (i = 1; i < repeat_min; i++)
03183 {
03184 memcpy(code, previous, len);
03185 code += len;
03186 }
03187 }
03188 if (repeat_max > 0) repeat_max -= repeat_min;
03189 }
03190
03191
03192
03193
03194
03195
03196
03197 if (repeat_max >= 0)
03198 {
03199 for (i = repeat_max - 1; i >= 0; i--)
03200 {
03201 *code++ = OP_BRAZERO + repeat_type;
03202
03203
03204
03205
03206 if (i != 0)
03207 {
03208 int offset;
03209 *code++ = OP_BRA;
03210 offset = (bralink == NULL)? 0 : code - bralink;
03211 bralink = code;
03212 PUTINC(code, 0, offset);
03213 }
03214
03215 memcpy(code, previous, len);
03216 code += len;
03217 }
03218
03219
03220
03221
03222 while (bralink != NULL)
03223 {
03224 int oldlinkoffset;
03225 int offset = code - bralink + 1;
03226 uschar *bra = code - offset;
03227 oldlinkoffset = GET(bra, 1);
03228 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
03229 *code++ = OP_KET;
03230 PUTINC(code, 0, offset);
03231 PUT(bra, 1, offset);
03232 }
03233 }
03234
03235
03236
03237
03238
03239
03240 else code[-ketoffset] = OP_KETRMAX + repeat_type;
03241 }
03242
03243
03244
03245 else
03246 {
03247 *errorptr = ERR11;
03248 goto FAILED;
03249 }
03250
03251
03252
03253
03254
03255
03256
03257 if (possessive_quantifier)
03258 {
03259 int len = code - tempcode;
03260 memmove(tempcode + 1+LINK_SIZE, tempcode, len);
03261 code += 1 + LINK_SIZE;
03262 len += 1 + LINK_SIZE;
03263 tempcode[0] = OP_ONCE;
03264 *code++ = OP_KET;
03265 PUTINC(code, 0, len);
03266 PUT(tempcode, 1, len);
03267 }
03268
03269
03270
03271
03272
03273 END_REPEAT:
03274 previous = NULL;
03275 cd->req_varyopt |= reqvary;
03276 break;
03277
03278
03279
03280
03281
03282
03283
03284
03285
03286 case '(':
03287 newoptions = options;
03288 skipbytes = 0;
03289
03290 if (*(++ptr) == '?')
03291 {
03292 int set, unset;
03293 int *optset;
03294
03295 switch (*(++ptr))
03296 {
03297 case '#':
03298 ptr++;
03299 while (*ptr != ')') ptr++;
03300 continue;
03301
03302 case ':':
03303 bravalue = OP_BRA;
03304 ptr++;
03305 break;
03306
03307 case '(':
03308 bravalue = OP_COND;
03309
03310
03311
03312 if (ptr[1] == 'R')
03313 {
03314 code[1+LINK_SIZE] = OP_CREF;
03315 PUT2(code, 2+LINK_SIZE, CREF_RECURSE);
03316 skipbytes = 3;
03317 ptr += 3;
03318 }
03319
03320
03321
03322
03323
03324 else if ((digitab[ptr[1]] && ctype_digit) != 0)
03325 {
03326 int condref;
03327 condref = *(++ptr) - '0';
03328 while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
03329 if (condref == 0)
03330 {
03331 *errorptr = ERR35;
03332 goto FAILED;
03333 }
03334 ptr++;
03335 code[1+LINK_SIZE] = OP_CREF;
03336 PUT2(code, 2+LINK_SIZE, condref);
03337 skipbytes = 3;
03338 }
03339
03340
03341 break;
03342
03343 case '=':
03344 bravalue = OP_ASSERT;
03345 ptr++;
03346 break;
03347
03348 case '!':
03349 bravalue = OP_ASSERT_NOT;
03350 ptr++;
03351 break;
03352
03353 case '<':
03354 switch (*(++ptr))
03355 {
03356 case '=':
03357 bravalue = OP_ASSERTBACK;
03358 ptr++;
03359 break;
03360
03361 case '!':
03362 bravalue = OP_ASSERTBACK_NOT;
03363 ptr++;
03364 break;
03365 }
03366 break;
03367
03368 case '>':
03369 bravalue = OP_ONCE;
03370 ptr++;
03371 break;
03372
03373 case 'C':
03374 *code++ = OP_CALLOUT;
03375 {
03376 int n = 0;
03377 while ((digitab[*(++ptr)] & ctype_digit) != 0)
03378 n = n * 10 + *ptr - '0';
03379 if (n > 255)
03380 {
03381 *errorptr = ERR38;
03382 goto FAILED;
03383 }
03384 *code++ = n;
03385 }
03386 previous = NULL;
03387 continue;
03388
03389 case 'P':
03390 if (*(++ptr) == '<')
03391 {
03392 int i, namelen;
03393 uschar *slot = cd->name_table;
03394 const uschar *name;
03395 name = ++ptr;
03396
03397 while (*ptr++ != '>');
03398 namelen = ptr - name - 1;
03399
03400 for (i = 0; i < cd->names_found; i++)
03401 {
03402 int crc = memcmp(name, slot+2, namelen);
03403 if (crc == 0)
03404 {
03405 if (slot[2+namelen] == 0)
03406 {
03407 *errorptr = ERR43;
03408 goto FAILED;
03409 }
03410 crc = -1;
03411 }
03412 if (crc < 0)
03413 {
03414 memmove(slot + cd->name_entry_size, slot,
03415 (cd->names_found - i) * cd->name_entry_size);
03416 break;
03417 }
03418 slot += cd->name_entry_size;
03419 }
03420
03421 PUT2(slot, 0, *brackets + 1);
03422 memcpy(slot + 2, name, namelen);
03423 slot[2+namelen] = 0;
03424 cd->names_found++;
03425 goto NUMBERED_GROUP;
03426 }
03427
03428 if (*ptr == '=' || *ptr == '>')
03429 {
03430 int i, namelen;
03431 int type = *ptr++;
03432 const uschar *name = ptr;
03433 uschar *slot = cd->name_table;
03434
03435 while (*ptr != ')') ptr++;
03436 namelen = ptr - name;
03437
03438 for (i = 0; i < cd->names_found; i++)
03439 {
03440 if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
03441 slot += cd->name_entry_size;
03442 }
03443 if (i >= cd->names_found)
03444 {
03445 *errorptr = ERR15;
03446 goto FAILED;
03447 }
03448
03449 recno = GET2(slot, 0);
03450
03451 if (type == '>') goto HANDLE_RECURSION;
03452
03453
03454
03455 previous = code;
03456 *code++ = OP_REF;
03457 PUT2INC(code, 0, recno);
03458 cd->backref_map |= (recno < 32)? (1 << recno) : 1;
03459 if (recno > cd->top_backref) cd->top_backref = recno;
03460 continue;
03461 }
03462
03463
03464 break;
03465
03466 case 'R':
03467 ptr++;
03468
03469
03470
03471
03472 case '0': case '1': case '2': case '3': case '4':
03473 case '5': case '6': case '7': case '8': case '9':
03474 {
03475 const uschar *called;
03476 recno = 0;
03477 while((digitab[*ptr] & ctype_digit) != 0)
03478 recno = recno * 10 + *ptr++ - '0';
03479
03480
03481
03482 HANDLE_RECURSION:
03483
03484 previous = code;
03485
03486
03487
03488
03489 *code = OP_END;
03490 called = (recno == 0)?
03491 cd->start_code : find_bracket(cd->start_code, recno);
03492
03493 if (called == NULL)
03494 {
03495 *errorptr = ERR15;
03496 goto FAILED;
03497 }
03498
03499
03500
03501
03502
03503 if (GET(called, 1) == 0 && could_be_empty(called, code, bcptr, utf8))
03504 {
03505 *errorptr = ERR40;
03506 goto FAILED;
03507 }
03508
03509
03510
03511 *code = OP_RECURSE;
03512 PUT(code, 1, called - cd->start_code);
03513 code += 1 + LINK_SIZE;
03514 }
03515 continue;
03516
03517
03518
03519 default:
03520 set = unset = 0;
03521 optset = &set;
03522
03523 while (*ptr != ')' && *ptr != ':')
03524 {
03525 switch (*ptr++)
03526 {
03527 case '-': optset = &unset; break;
03528
03529 case 'i': *optset |= PCRE_CASELESS; break;
03530 case 'm': *optset |= PCRE_MULTILINE; break;
03531 case 's': *optset |= PCRE_DOTALL; break;
03532 case 'x': *optset |= PCRE_EXTENDED; break;
03533 case 'U': *optset |= PCRE_UNGREEDY; break;
03534 case 'X': *optset |= PCRE_EXTRA; break;
03535 }
03536 }
03537
03538
03539
03540 newoptions = (options | set) & (~unset);
03541
03542
03543
03544
03545
03546
03547
03548
03549
03550
03551
03552
03553 if (*ptr == ')')
03554 {
03555 if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
03556 {
03557 *code++ = OP_OPT;
03558 *code++ = newoptions & PCRE_IMS;
03559 }
03560
03561
03562
03563
03564
03565 *optionsptr = options = newoptions;
03566 greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
03567 greedy_non_default = greedy_default ^ 1;
03568 req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
03569
03570 previous = NULL;
03571 continue;
03572 }
03573
03574
03575
03576
03577
03578
03579 bravalue = OP_BRA;
03580 ptr++;
03581 }
03582 }
03583
03584
03585
03586
03587 else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
03588 {
03589 bravalue = OP_BRA;
03590 }
03591
03592
03593
03594
03595
03596 else
03597 {
03598 NUMBERED_GROUP:
03599 if (++(*brackets) > EXTRACT_BASIC_MAX)
03600 {
03601 bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
03602 code[1+LINK_SIZE] = OP_BRANUMBER;
03603 PUT2(code, 2+LINK_SIZE, *brackets);
03604 skipbytes = 3;
03605 }
03606 else bravalue = OP_BRA + *brackets;
03607 }
03608
03609
03610
03611
03612
03613
03614 previous = (bravalue >= OP_ONCE)? code : NULL;
03615 *code = bravalue;
03616 tempcode = code;
03617 tempreqvary = cd->req_varyopt;
03618
03619 if (!compile_regex(
03620 newoptions,
03621 options & PCRE_IMS,
03622 brackets,
03623 &tempcode,
03624 &ptr,
03625 errorptr,
03626 (bravalue == OP_ASSERTBACK ||
03627 bravalue == OP_ASSERTBACK_NOT),
03628 skipbytes,
03629 &subfirstbyte,
03630 &subreqbyte,
03631 bcptr,
03632 cd))
03633 goto FAILED;
03634
03635
03636
03637
03638
03639
03640
03641
03642
03643 else if (bravalue == OP_COND)
03644 {
03645 uschar *tc = code;
03646 condcount = 0;
03647
03648 do {
03649 condcount++;
03650 tc += GET(tc,1);
03651 }
03652 while (*tc != OP_KET);
03653
03654 if (condcount > 2)
03655 {
03656 *errorptr = ERR27;
03657 goto FAILED;
03658 }
03659
03660
03661
03662
03663 if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;
03664 }
03665
03666
03667
03668
03669
03670
03671
03672 zeroreqbyte = reqbyte;
03673 zerofirstbyte = firstbyte;
03674 groupsetfirstbyte = false;
03675
03676 if (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_COND)
03677 {
03678
03679
03680
03681
03682
03683
03684 if (firstbyte == REQ_UNSET)
03685 {
03686 if (subfirstbyte >= 0)
03687 {
03688 firstbyte = subfirstbyte;
03689 groupsetfirstbyte = true;
03690 }
03691 else firstbyte = REQ_NONE;
03692 zerofirstbyte = REQ_NONE;
03693 }
03694
03695
03696
03697
03698
03699 else if (subfirstbyte >= 0 && subreqbyte < 0)
03700 subreqbyte = subfirstbyte | tempreqvary;
03701
03702
03703
03704
03705 if (subreqbyte >= 0) reqbyte = subreqbyte;
03706 }
03707
03708
03709
03710
03711
03712
03713
03714
03715
03716 else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
03717
03718
03719
03720 code = tempcode;
03721
03722
03723
03724 if (*ptr != ')')
03725 {
03726 *errorptr = ERR14;
03727 goto FAILED;
03728 }
03729 break;
03730
03731
03732
03733
03734
03735 case '\\':
03736 tempptr = ptr;
03737 c = check_escape(&ptr, errorptr, *brackets, options, false);
03738
03739
03740
03741
03742
03743
03744
03745
03746 if (c < 0)
03747 {
03748 if (-c == ESC_Q)
03749 {
03750 if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2;
03751 else inescq = true;
03752 continue;
03753 }
03754
03755
03756
03757
03758 if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
03759 firstbyte = REQ_NONE;
03760
03761
03762
03763 zerofirstbyte = firstbyte;
03764 zeroreqbyte = reqbyte;
03765
03766
03767
03768 if (-c >= ESC_REF)
03769 {
03770 int number = -c - ESC_REF;
03771 previous = code;
03772 *code++ = OP_REF;
03773 PUT2INC(code, 0, number);
03774 }
03775 else
03776 {
03777 previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
03778 *code++ = -c;
03779 }
03780 continue;
03781 }
03782
03783
03784
03785 ptr = tempptr;
03786 c = '\\';
03787
03788
03789
03790
03791
03792 NORMAL_CHAR:
03793 default:
03794 previous = code;
03795 *code = OP_CHARS;
03796 code += 2;
03797 length = 0;
03798
03799 do
03800 {
03801
03802
03803 if (inescq)
03804 {
03805 if (c == '\\' && ptr[1] == 'E')
03806 {
03807 inescq = false;
03808 ptr++;
03809 }
03810 else
03811 {
03812 *code++ = c;
03813 length++;
03814 }
03815 continue;
03816 }
03817
03818
03819
03820 if ((options & PCRE_EXTENDED) != 0)
03821 {
03822 if ((cd->ctypes[c] & ctype_space) != 0) continue;
03823 if (c == '#')
03824 {
03825
03826
03827 while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
03828 if (c == 0) break;
03829 continue;
03830 }
03831 }
03832
03833
03834
03835
03836
03837 if (c == '\\')
03838 {
03839 tempptr = ptr;
03840 c = check_escape(&ptr, errorptr, *brackets, options, false);
03841 if (c < 0) { ptr = tempptr; break; }
03842
03843
03844
03845
03846 }
03847
03848
03849
03850 *code++ = c;
03851 length++;
03852 }
03853
03854
03855
03856 while (length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
03857
03858
03859
03860
03861
03862
03863
03864
03865
03866
03867
03868 {
03869
03870
03871 if (firstbyte == REQ_UNSET)
03872 {
03873 if (length == 1)
03874 {
03875 zerofirstbyte = REQ_NONE;
03876 firstbyte = previous[2] | req_caseopt;
03877 zeroreqbyte = reqbyte;
03878 }
03879 else
03880 {
03881 zerofirstbyte = firstbyte = previous[2] | req_caseopt;
03882 zeroreqbyte = (length > 2)?
03883 (code[-2] | req_caseopt | cd->req_varyopt) : reqbyte;
03884 reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
03885 }
03886 }
03887
03888
03889
03890 else
03891 {
03892 zerofirstbyte = firstbyte;
03893 zeroreqbyte = (length == 1)? reqbyte :
03894 code[-2] | req_caseopt | cd->req_varyopt;
03895 reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
03896 }
03897 }
03898
03899
03900
03901 previous[1] = length;
03902 if (length < MAXLIT) ptr--;
03903 break;
03904 }
03905 }
03906
03907
03908
03909
03910
03911 FAILED:
03912 *ptrptr = ptr;
03913 return false;
03914 }
03915
03916
03917
03918
03919
03920
03921
03922
03923
03924
03925
03926
03927
03928
03929
03930
03931
03932
03933
03934
03935
03936
03937
03938
03939
03940
03941
03942
03943
03944
03945
03946
03947
03948 static bool
03949 compile_regex(int options, int oldims, int *brackets, uschar **codeptr,
03950 const uschar **ptrptr, const char **errorptr, bool lookbehind, int skipbytes,
03951 int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd)
03952 {
03953 const uschar *ptr = *ptrptr;
03954 uschar *code = *codeptr;
03955 uschar *last_branch = code;
03956 uschar *start_bracket = code;
03957 uschar *reverse_count = NULL;
03958 int firstbyte, reqbyte;
03959 int branchfirstbyte, branchreqbyte;
03960 branch_chain bc;
03961
03962 bc.outer = bcptr;
03963 bc.current = code;
03964
03965 firstbyte = reqbyte = REQ_UNSET;
03966
03967
03968
03969 PUT(code, 1, 0);
03970 code += 1 + LINK_SIZE + skipbytes;
03971
03972
03973
03974 for (;!MuxAlarm.bAlarmed;)
03975 {
03976
03977
03978 if ((options & PCRE_IMS) != oldims)
03979 {
03980 *code++ = OP_OPT;
03981 *code++ = options & PCRE_IMS;
03982 }
03983
03984
03985
03986 if (lookbehind)
03987 {
03988 *code++ = OP_REVERSE;
03989 reverse_count = code;
03990 PUTINC(code, 0, 0);
03991 }
03992
03993
03994
03995 if (!compile_branch(&options, brackets, &code, &ptr, errorptr,
03996 &branchfirstbyte, &branchreqbyte, &bc, cd))
03997 {
03998 *ptrptr = ptr;
03999 return false;
04000 }
04001
04002
04003
04004
04005 if (*last_branch != OP_ALT)
04006 {
04007 firstbyte = branchfirstbyte;
04008 reqbyte = branchreqbyte;
04009 }
04010
04011
04012
04013
04014
04015
04016 else
04017 {
04018
04019
04020
04021
04022 if (firstbyte >= 0 && firstbyte != branchfirstbyte)
04023 {
04024 if (reqbyte < 0) reqbyte = firstbyte;
04025 firstbyte = REQ_NONE;
04026 }
04027
04028
04029
04030
04031 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
04032 branchreqbyte = branchfirstbyte;
04033
04034
04035
04036 if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
04037 reqbyte = REQ_NONE;
04038 else reqbyte |= branchreqbyte;
04039 }
04040
04041
04042
04043
04044
04045 if (lookbehind)
04046 {
04047 int length;
04048 *code = OP_END;
04049 length = find_fixedlength(last_branch, options);
04050 DPRINTF(("fixed length = %d\n", length));
04051 if (length < 0)
04052 {
04053 *errorptr = (length == -2)? ERR36 : ERR25;
04054 *ptrptr = ptr;
04055 return false;
04056 }
04057 PUT(reverse_count, 0, length);
04058 }
04059
04060
04061
04062
04063
04064
04065
04066
04067
04068
04069 if (*ptr != '|')
04070 {
04071 int length = code - last_branch;
04072 do
04073 {
04074 int prev_length = GET(last_branch, 1);
04075 PUT(last_branch, 1, length);
04076 length = prev_length;
04077 last_branch -= length;
04078 }
04079 while (length > 0);
04080
04081
04082
04083 *code = OP_KET;
04084 PUT(code, 1, code - start_bracket);
04085 code += 1 + LINK_SIZE;
04086
04087
04088
04089 if ((options & PCRE_IMS) != oldims && *ptr == ')')
04090 {
04091 *code++ = OP_OPT;
04092 *code++ = oldims;
04093 }
04094
04095
04096
04097 *codeptr = code;
04098 *ptrptr = ptr;
04099 *firstbyteptr = firstbyte;
04100 *reqbyteptr = reqbyte;
04101 return true;
04102 }
04103
04104
04105
04106
04107
04108
04109 *code = OP_ALT;
04110 PUT(code, 1, code - last_branch);
04111 bc.current = last_branch = code;
04112 code += 1 + LINK_SIZE;
04113 ptr++;
04114 }
04115 return false;
04116 }
04117
04118
04119
04120
04121
04122
04123
04124
04125
04126
04127
04128
04129
04130
04131
04132
04133
04134
04135
04136
04137
04138
04139
04140
04141
04142
04143
04144
04145
04146
04147
04148
04149
04150
04151
04152
04153
04154
04155
04156
04157
04158
04159 static bool
04160 is_anchored(register const uschar *code, int *options, unsigned int bracket_map,
04161 unsigned int backref_map)
04162 {
04163 do {
04164 const uschar *scode =
04165 first_significant_code(code + 1+LINK_SIZE, options, PCRE_MULTILINE);
04166 register int op = *scode;
04167
04168
04169
04170 if (op > OP_BRA)
04171 {
04172 int new_map;
04173 op -= OP_BRA;
04174 if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE);
04175 new_map = bracket_map | ((op < 32)? (1 << op) : 1);
04176 if (!is_anchored(scode, options, new_map, backref_map)) return false;
04177 }
04178
04179
04180
04181 else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
04182 {
04183 if (!is_anchored(scode, options, bracket_map, backref_map)) return false;
04184 }
04185
04186
04187
04188
04189 else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
04190 (*options & PCRE_DOTALL) != 0)
04191 {
04192 if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return false;
04193 }
04194
04195
04196
04197 else if (op != OP_SOD && op != OP_SOM &&
04198 ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
04199 return false;
04200 code += GET(code, 1);
04201 }
04202 while (*code == OP_ALT);
04203 return true;
04204 }
04205
04206
04207
04208
04209
04210
04211
04212
04213
04214
04215
04216
04217
04218
04219
04220
04221
04222
04223
04224
04225
04226
04227
04228
04229 static bool
04230 is_startline(const uschar *code, unsigned int bracket_map,
04231 unsigned int backref_map)
04232 {
04233 do {
04234 const uschar *scode = first_significant_code(code + 1+LINK_SIZE, NULL, 0);
04235 register int op = *scode;
04236
04237
04238
04239 if (op > OP_BRA)
04240 {
04241 int new_map;
04242 op -= OP_BRA;
04243 if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE);
04244 new_map = bracket_map | ((op < 32)? (1 << op) : 1);
04245 if (!is_startline(scode, new_map, backref_map)) return false;
04246 }
04247
04248
04249
04250 else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
04251 { if (!is_startline(scode, bracket_map, backref_map)) return false; }
04252
04253
04254
04255
04256 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
04257 {
04258 if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return false;
04259 }
04260
04261
04262
04263 else if (op != OP_CIRC) return false;
04264 code += GET(code, 1);
04265 }
04266 while (*code == OP_ALT);
04267 return true;
04268 }
04269
04270
04271
04272
04273
04274
04275
04276
04277
04278
04279
04280
04281
04282
04283
04284
04285
04286
04287
04288
04289
04290
04291
04292 static int
04293 find_firstassertedchar(const uschar *code, int *options, bool inassert)
04294 {
04295 register int c = -1;
04296 do {
04297 int d;
04298 const uschar *scode =
04299 first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS);
04300 register int op = *scode;
04301
04302 if (op >= OP_BRA) op = OP_BRA;
04303
04304 switch(op)
04305 {
04306 default:
04307 return -1;
04308
04309 case OP_BRA:
04310 case OP_ASSERT:
04311 case OP_ONCE:
04312 case OP_COND:
04313 if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0)
04314 return -1;
04315 if (c < 0) c = d; else if (c != d) return -1;
04316 break;
04317
04318 case OP_EXACT:
04319 scode++;
04320
04321 case OP_CHARS:
04322 scode++;
04323
04324 case OP_PLUS:
04325 case OP_MINPLUS:
04326 if (!inassert) return -1;
04327 if (c < 0)
04328 {
04329 c = scode[1];
04330 if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS;
04331 }
04332 else if (c != scode[1]) return -1;
04333 break;
04334 }
04335
04336 code += GET(code, 1);
04337 }
04338 while (*code == OP_ALT);
04339 return c;
04340 }
04341
04342
04343
04344
04345
04346
04347
04348
04349
04350
04351
04352
04353
04354
04355
04356
04357
04358
04359
04360
04361
04362
04363 pcre *
04364 pcre_compile(const char *pattern, int options, const char **errorptr,
04365 int *erroroffset, const unsigned char *tables)
04366 {
04367 real_pcre *re;
04368 int length = 1 + LINK_SIZE;
04369 int runlength;
04370 int c, firstbyte, reqbyte;
04371 int bracount = 0;
04372 int branch_extra = 0;
04373 int branch_newextra;
04374 int item_count = -1;
04375 int name_count = 0;
04376 int max_name_size = 0;
04377 bool inescq = false;
04378 unsigned int brastackptr = 0;
04379 size_t size;
04380 uschar *code;
04381 const uschar *codestart;
04382 const uschar *ptr;
04383 compile_data compile_block;
04384 int brastack[BRASTACK_SIZE];
04385 uschar bralenstack[BRASTACK_SIZE];
04386
04387
04388
04389
04390 if (errorptr == NULL) return NULL;
04391 *errorptr = NULL;
04392
04393
04394
04395 if (erroroffset == NULL)
04396 {
04397 *errorptr = ERR16;
04398 return NULL;
04399 }
04400 *erroroffset = 0;
04401
04402
04403
04404 if ((options & PCRE_UTF8) != 0)
04405 {
04406 *errorptr = ERR32;
04407 return NULL;
04408 }
04409
04410 if ((options & ~PUBLIC_OPTIONS) != 0)
04411 {
04412 *errorptr = ERR17;
04413 return NULL;
04414 }
04415
04416
04417
04418 if (tables == NULL) tables = pcre_default_tables;
04419 compile_block.lcc = tables + lcc_offset;
04420 compile_block.fcc = tables + fcc_offset;
04421 compile_block.cbits = tables + cbits_offset;
04422 compile_block.ctypes = tables + ctypes_offset;
04423
04424
04425
04426
04427
04428
04429 compile_block.top_backref = 0;
04430 compile_block.backref_map = 0;
04431
04432
04433
04434 DPRINTF(("------------------------------------------------------------------\n"));
04435 DPRINTF(("%s\n", pattern));
04436
04437
04438
04439
04440
04441
04442
04443
04444 ptr = (const uschar *)(pattern - 1);
04445 while ((c = *(++ptr)) != 0)
04446 {
04447 int min, max;
04448 #if defined(WIN32) && (_MSC_VER == 1200) && defined(_M_IX86) && !defined(__INTEL_COMPILER)
04449
04450
04451
04452
04453
04454
04455
04456 volatile int class_optcount;
04457 #else
04458 int class_optcount;
04459 #endif
04460 int bracket_length;
04461 int duplength;
04462
04463
04464
04465 if (inescq) goto NORMAL_CHAR;
04466
04467
04468
04469 if ((options & PCRE_EXTENDED) != 0)
04470 {
04471 if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
04472 if (c == '#')
04473 {
04474
04475
04476 while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
04477 if (c == 0) break;
04478 continue;
04479 }
04480 }
04481
04482 item_count++;
04483
04484 switch(c)
04485 {
04486
04487
04488
04489
04490
04491 case '\\':
04492 {
04493 const uschar *save_ptr = ptr;
04494 c = check_escape(&ptr, errorptr, bracount, options, false);
04495 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04496 if (c >= 0)
04497 {
04498 ptr = save_ptr;
04499 c = '\\';
04500 goto NORMAL_CHAR;
04501 }
04502 }
04503
04504
04505
04506 if (-c == ESC_Q)
04507 {
04508 inescq = true;
04509 continue;
04510 }
04511
04512
04513
04514 length++;
04515
04516
04517
04518
04519
04520 if (c <= -ESC_REF)
04521 {
04522 int refnum = -c - ESC_REF;
04523 compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1;
04524 if (refnum > compile_block.top_backref)
04525 compile_block.top_backref = refnum;
04526 length += 2;
04527 if (ptr[1] == '{' && is_counted_repeat(ptr+2))
04528 {
04529 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
04530 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04531 if ((min == 0 && (max == 1 || max == -1)) ||
04532 (min == 1 && max == -1))
04533 length++;
04534 else length += 5;
04535 if (ptr[1] == '?') ptr++;
04536 }
04537 }
04538 continue;
04539
04540 case '^':
04541 case '.':
04542 case '$':
04543 length++;
04544 continue;
04545
04546 case '*':
04547 case '+':
04548 case '?':
04549 length++;
04550 goto POSESSIVE;
04551
04552
04553
04554
04555 case '{':
04556 if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
04557 ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);
04558 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04559
04560
04561
04562 if ((min == 0 && (max == 1 || max == -1)) ||
04563 (min == 1 && max == -1))
04564 length++;
04565
04566
04567
04568 else
04569 {
04570
04571
04572 {
04573 if (min != 1)
04574 {
04575 length--;
04576 if (min > 0) length += 4;
04577 }
04578
04579 length += (max > 0)? 4 : 2;
04580 }
04581 }
04582
04583 if (ptr[1] == '?') ptr++;
04584
04585 POSESSIVE:
04586 if (ptr[1] == '+')
04587 {
04588 ptr++;
04589 length += 2 + 2*LINK_SIZE;
04590 }
04591 continue;
04592
04593
04594
04595
04596
04597
04598 case '|':
04599 length += 1 + LINK_SIZE + branch_extra;
04600 continue;
04601
04602
04603
04604
04605
04606
04607
04608
04609
04610 case '[':
04611 class_optcount = 0;
04612
04613 if (*(++ptr) == '^') ptr++;
04614
04615
04616
04617 if (*ptr != 0) do
04618 {
04619
04620
04621 if (inescq)
04622 {
04623 if (*ptr != '\\' || ptr[1] != 'E') goto NON_SPECIAL_CHARACTER;
04624 inescq = false;
04625 ptr += 1;
04626 continue;
04627 }
04628
04629
04630
04631 if (*ptr == '\\')
04632 {
04633 int ch = check_escape(&ptr, errorptr, bracount, options, true);
04634 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04635
04636
04637
04638 if (-ch == ESC_b) ch = '\b';
04639
04640
04641
04642 if (-ch == ESC_Q)
04643 {
04644 inescq = true;
04645 continue;
04646 }
04647
04648
04649
04650 if (ch >= 0)
04651 {
04652 class_optcount++;
04653 }
04654 else class_optcount = 10;
04655 }
04656
04657
04658
04659
04660 else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))
04661 {
04662 ptr++;
04663 class_optcount = 10;
04664 }
04665
04666
04667
04668
04669 else
04670 {
04671 NON_SPECIAL_CHARACTER:
04672 class_optcount++;
04673
04674 }
04675 }
04676 while (*(++ptr) != 0 && (inescq || *ptr != ']'));
04677
04678 if (*ptr == 0)
04679 {
04680 *errorptr = ERR6;
04681 goto PCRE_ERROR_RETURN;
04682 }
04683
04684
04685
04686
04687
04688 if (class_optcount == 1) length += 3; else
04689 {
04690 length += 33;
04691
04692
04693
04694
04695 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
04696 {
04697 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
04698 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
04699 if ((min == 0 && (max == 1 || max == -1)) ||
04700 (min == 1 && max == -1))
04701 length++;
04702 else length += 5;
04703 if (ptr[1] == '+')
04704 {
04705 ptr++;
04706 length += 2 + 2*LINK_SIZE;
04707 }
04708 else if (ptr[1] == '?') ptr++;
04709 }
04710 }
04711 continue;
04712
04713
04714
04715 case '(':
04716 branch_newextra = 0;
04717 bracket_length = 1 + LINK_SIZE;
04718
04719
04720
04721 if (ptr[1] == '?')
04722 {
04723 int set, unset;
04724 int *optset;
04725
04726 switch (c = ptr[2])
04727 {
04728
04729 case '#':
04730 ptr += 3;
04731 while (*ptr != 0 && *ptr != ')') ptr++;
04732 if (*ptr == 0)
04733 {
04734 *errorptr = ERR18;
04735 goto PCRE_ERROR_RETURN;
04736 }
04737 continue;
04738
04739
04740
04741
04742
04743
04744 case ':':
04745 case '=':
04746 case '!':
04747 case '>':
04748 ptr += 2;
04749 break;
04750
04751
04752
04753
04754
04755
04756
04757
04758
04759 case 'R':
04760 ptr++;
04761
04762 case '0': case '1': case '2': case '3': case '4':
04763 case '5': case '6': case '7': case '8': case '9':
04764 ptr += 2;
04765 if (c != 'R')
04766 while ((digitab[*(++ptr)] & ctype_digit) != 0);
04767 if (*ptr != ')')
04768 {
04769 *errorptr = ERR29;
04770 goto PCRE_ERROR_RETURN;
04771 }
04772 length += 1 + LINK_SIZE;
04773
04774
04775
04776
04777
04778 if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')
04779 {
04780 length += 2 + 2 * LINK_SIZE;
04781 duplength = 5 + 3 * LINK_SIZE;
04782 goto HANDLE_QUANTIFIED_BRACKETS;
04783 }
04784 continue;
04785
04786
04787
04788
04789
04790 case 'C':
04791 ptr += 2;
04792 while ((digitab[*(++ptr)] & ctype_digit) != 0);
04793 if (*ptr != ')')
04794 {
04795 *errorptr = ERR39;
04796 goto PCRE_ERROR_RETURN;
04797 }
04798 length += 2;
04799 continue;
04800
04801
04802
04803 case 'P':
04804 ptr += 3;
04805 if (*ptr == '<')
04806 {
04807 const uschar *p;
04808 p = ++ptr;
04809 while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++;
04810 if (*ptr != '>')
04811 {
04812 *errorptr = ERR42;
04813 goto PCRE_ERROR_RETURN;
04814 }
04815 name_count++;
04816 if (ptr - p > max_name_size) max_name_size = (ptr - p);
04817 break;
04818 }
04819
04820 if (*ptr == '=' || *ptr == '>')
04821 {
04822 while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);
04823 if (*ptr != ')')
04824 {
04825 *errorptr = ERR42;
04826 goto PCRE_ERROR_RETURN;
04827 }
04828 break;
04829 }
04830
04831
04832
04833 *errorptr = ERR41;
04834 goto PCRE_ERROR_RETURN;
04835
04836
04837
04838 case '<':
04839 ptr += 3;
04840 if (*ptr == '=' || *ptr == '!')
04841 {
04842 branch_newextra = 1 + LINK_SIZE;
04843 length += 1 + LINK_SIZE;
04844 break;
04845 }
04846 *errorptr = ERR24;
04847 goto PCRE_ERROR_RETURN;
04848
04849
04850
04851
04852
04853 case '(':
04854 if (ptr[3] == 'R' && ptr[4] == ')')
04855 {
04856 ptr += 4;
04857 length += 3;
04858 }
04859 else if ((digitab[ptr[3]] & ctype_digit) != 0)
04860 {
04861 ptr += 4;
04862 length += 3;
04863 while ((digitab[*ptr] & ctype_digit) != 0) ptr++;
04864 if (*ptr != ')')
04865 {
04866 *errorptr = ERR26;
04867 goto PCRE_ERROR_RETURN;
04868 }
04869 }
04870 else
04871 {
04872 ptr++;
04873 if (ptr[2] != '?' ||
04874 (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
04875 {
04876 ptr += 2;
04877 *errorptr = ERR28;
04878 goto PCRE_ERROR_RETURN;
04879 }
04880 }
04881 break;
04882
04883
04884
04885
04886
04887
04888 default:
04889 set = unset = 0;
04890 optset = &set;
04891 ptr += 2;
04892
04893 for (;; ptr++)
04894 {
04895 c = *ptr;
04896 switch (c)
04897 {
04898 case 'i':
04899 *optset |= PCRE_CASELESS;
04900 continue;
04901
04902 case 'm':
04903 *optset |= PCRE_MULTILINE;
04904 continue;
04905
04906 case 's':
04907 *optset |= PCRE_DOTALL;
04908 continue;
04909
04910 case 'x':
04911 *optset |= PCRE_EXTENDED;
04912 continue;
04913
04914 case 'X':
04915 *optset |= PCRE_EXTRA;
04916 continue;
04917
04918 case 'U':
04919 *optset |= PCRE_UNGREEDY;
04920 continue;
04921
04922 case '-':
04923 optset = &unset;
04924 continue;
04925
04926
04927
04928
04929
04930
04931
04932
04933
04934
04935
04936
04937
04938
04939 case ')':
04940 if (item_count == 0)
04941 {
04942 options = (options | set) & (~unset);
04943 set = unset = 0;
04944 item_count--;
04945 }
04946
04947
04948
04949
04950
04951
04952
04953
04954
04955
04956
04957
04958
04959
04960
04961
04962 case ':':
04963 if (((set|unset) & PCRE_IMS) != 0)
04964 {
04965 length += 4;
04966 branch_newextra = 2;
04967 if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
04968 }
04969 goto END_OPTIONS;
04970
04971
04972
04973 default:
04974 *errorptr = ERR12;
04975 goto PCRE_ERROR_RETURN;
04976 }
04977 }
04978
04979
04980
04981
04982
04983
04984
04985 END_OPTIONS:
04986 if (c == ')')
04987 {
04988 if (branch_newextra == 2 &&
04989 (branch_extra == 0 || branch_extra == 1+LINK_SIZE))
04990 branch_extra += branch_newextra;
04991 continue;
04992 }
04993
04994
04995
04996 }
04997 }
04998
04999
05000
05001
05002
05003
05004
05005 else if ((options & PCRE_NO_AUTO_CAPTURE) == 0)
05006 {
05007 bracount++;
05008 if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
05009 }
05010
05011
05012
05013
05014
05015
05016 if (brastackptr >= sizeof(brastack)/sizeof(int))
05017 {
05018 *errorptr = ERR19;
05019 goto PCRE_ERROR_RETURN;
05020 }
05021
05022 bralenstack[brastackptr] = branch_extra;
05023 branch_extra = branch_newextra;
05024
05025 brastack[brastackptr++] = length;
05026 length += bracket_length;
05027 continue;
05028
05029
05030
05031
05032
05033
05034
05035 case ')':
05036 length += 1 + LINK_SIZE;
05037 if (brastackptr > 0)
05038 {
05039 duplength = length - brastack[--brastackptr];
05040 branch_extra = bralenstack[brastackptr];
05041 }
05042 else duplength = 0;
05043
05044
05045
05046
05047
05048
05049 HANDLE_QUANTIFIED_BRACKETS:
05050
05051
05052
05053
05054 if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
05055 {
05056 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
05057 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
05058 }
05059 else if (c == '*') { min = 0; max = -1; ptr++; }
05060 else if (c == '+') { min = 1; max = -1; ptr++; }
05061 else if (c == '?') { min = 0; max = 1; ptr++; }
05062 else { min = 1; max = 1; }
05063
05064
05065
05066
05067
05068
05069 if (min == 0)
05070 {
05071 length++;
05072 if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE);
05073 }
05074
05075
05076
05077
05078
05079
05080
05081 else
05082 {
05083 length += (min - 1) * duplength;
05084 if (max > min)
05085 length += (max - min) * (duplength + 3 + 2*LINK_SIZE)
05086 - (2 + 2*LINK_SIZE);
05087 }
05088
05089
05090
05091 if (ptr[1] == '+')
05092 {
05093 ptr++;
05094 length += 2 + 2*LINK_SIZE;
05095 }
05096 continue;
05097
05098
05099
05100
05101
05102
05103 NORMAL_CHAR:
05104 default:
05105 length += 2;
05106 runlength = 0;
05107 do
05108 {
05109
05110
05111 if (inescq)
05112 {
05113 if (c == '\\' && ptr[1] == 'E')
05114 {
05115 inescq = false;
05116 ptr++;
05117 }
05118 else runlength++;
05119 continue;
05120 }
05121
05122
05123
05124 if ((options & PCRE_EXTENDED) != 0)
05125 {
05126 if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
05127 if (c == '#')
05128 {
05129
05130
05131 while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
05132 continue;
05133 }
05134 }
05135
05136
05137
05138
05139 if (c == '\\')
05140 {
05141 const uschar *saveptr = ptr;
05142 c = check_escape(&ptr, errorptr, bracount, options, false);
05143 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
05144 if (c < 0) { ptr = saveptr; break; }
05145
05146
05147
05148
05149
05150 }
05151
05152
05153
05154 runlength++;
05155 }
05156
05157
05158
05159 while (runlength < MAXLIT &&
05160 (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
05161
05162
05163
05164 if (runlength < MAXLIT) ptr--;
05165
05166
05167
05168
05169
05170
05171
05172 length += runlength;
05173 continue;
05174 }
05175 }
05176
05177 length += 2 + LINK_SIZE;
05178
05179 if (length > MAX_PATTERN_SIZE)
05180 {
05181 *errorptr = ERR20;
05182 return NULL;
05183 }
05184
05185
05186
05187
05188 size = length + sizeof(real_pcre) + name_count * (max_name_size + 3);
05189 re = static_cast<real_pcre *>(malloc(size));
05190
05191 if (re == NULL)
05192 {
05193 *errorptr = ERR21;
05194 return NULL;
05195 }
05196
05197
05198
05199 re->magic_number = MAGIC_NUMBER;
05200 re->size = size;
05201 re->options = options;
05202 re->tables = tables;
05203 re->name_entry_size = max_name_size + 3;
05204 re->name_count = name_count;
05205
05206
05207
05208
05209 compile_block.names_found = 0;
05210 compile_block.name_entry_size = max_name_size + 3;
05211 compile_block.name_table = (uschar *)re + sizeof(real_pcre);
05212 codestart = compile_block.name_table + re->name_entry_size * re->name_count;
05213 compile_block.start_code = codestart;
05214 compile_block.req_varyopt = 0;
05215
05216
05217
05218
05219
05220 ptr = (const uschar *)pattern;
05221 code = (uschar *)codestart;
05222 *code = OP_BRA;
05223 bracount = 0;
05224 (void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr,
05225 errorptr, false, 0, &firstbyte, &reqbyte, NULL, &compile_block);
05226 re->top_bracket = bracount;
05227 re->top_backref = compile_block.top_backref;
05228
05229
05230
05231 if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
05232
05233
05234
05235
05236 *code++ = OP_END;
05237
05238 if (code - codestart > length) *errorptr = ERR23;
05239
05240
05241
05242
05243 if (re->top_backref > re->top_bracket) *errorptr = ERR15;
05244
05245
05246
05247 if (*errorptr != NULL)
05248 {
05249 free(re);
05250 PCRE_ERROR_RETURN:
05251 *erroroffset = ptr - (const uschar *)pattern;
05252 return NULL;
05253 }
05254
05255
05256
05257
05258
05259
05260
05261
05262
05263
05264
05265 if ((options & PCRE_ANCHORED) == 0)
05266 {
05267 int temp_options = options;
05268 if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map))
05269 re->options |= PCRE_ANCHORED;
05270 else
05271 {
05272 if (firstbyte < 0)
05273 firstbyte = find_firstassertedchar(codestart, &temp_options, false);
05274 if (firstbyte >= 0)
05275 {
05276 int ch = firstbyte & 255;
05277 re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
05278 compile_block.fcc[ch] == ch)? ch : firstbyte;
05279 re->options |= PCRE_FIRSTSET;
05280 }
05281 else if (is_startline(codestart, 0, compile_block.backref_map))
05282 re->options |= PCRE_STARTLINE;
05283 }
05284 }
05285
05286
05287
05288
05289
05290 if (reqbyte >= 0 &&
05291 ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
05292 {
05293 int ch = reqbyte & 255;
05294 re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
05295 compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
05296 re->options |= PCRE_REQCHSET;
05297 }
05298
05299 return (pcre *)re;
05300 }
05301
05302
05303
05304
05305
05306
05307
05308
05309
05310
05311
05312
05313
05314
05315
05316
05317
05318
05319
05320
05321 static bool
05322 match_ref(int offset, register const uschar *eptr, int length, match_data *md,
05323 unsigned long int ims)
05324 {
05325 const uschar *p = md->start_subject + md->offset_vector[offset];
05326
05327
05328
05329 if (length > md->end_subject - eptr) return false;
05330
05331
05332
05333 if ((ims & PCRE_CASELESS) != 0)
05334 {
05335 while (length-- > 0)
05336 if (md->lcc[*p++] != md->lcc[*eptr++]) return false;
05337 }
05338 else
05339 { while (length-- > 0) if (*p++ != *eptr++) return false; }
05340
05341 return true;
05342 }
05343
05344
05345
05346
05347
05348
05349
05350
05351
05352
05353
05354
05355
05356
05357
05358
05359
05360
05361
05362
05363
05364
05365
05366
05367
05368
05369
05370 #define REGISTER register
05371 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)
05372 #define RRETURN(ra) return ra
05373
05374
05375
05376
05377
05378
05379
05380
05381
05382
05383
05384
05385
05386
05387
05388
05389
05390
05391
05392
05393
05394
05395
05396
05397
05398
05399
05400
05401
05402
05403
05404
05405
05406
05407
05408
05409
05410
05411
05412 static int
05413 match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,
05414 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
05415 int flags)
05416 {
05417
05418
05419
05420
05421 register int rrc;
05422 register int i;
05423 register int c;
05424
05425
05426
05427
05428
05429
05430 #define fi i
05431 #define fc c
05432
05433 const uschar *callpat;
05434
05435 const uschar *data;
05436
05437 const uschar *next;
05438 const uschar *pp;
05439 const uschar *prev;
05440 const uschar *saved_eptr;
05441
05442 recursion_info new_recursive;
05443
05444 bool cur_is_word;
05445 bool condition;
05446 bool minimize;
05447 bool prev_is_word;
05448
05449 unsigned long int original_ims;
05450
05451 int ctype;
05452 int length;
05453 int max;
05454 int min;
05455 int number;
05456 int offset;
05457 int op;
05458 int save_capture_last;
05459 int save_offset1, save_offset2, save_offset3;
05460 int stacksave[REC_STACK_SAVE_MAX];
05461
05462 eptrblock newptrb;
05463
05464
05465
05466
05467
05468
05469
05470
05471 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
05472
05473 original_ims = ims;
05474
05475
05476
05477
05478
05479
05480 if ((flags & match_isgroup) != 0)
05481 {
05482 newptrb.epb_prev = eptrb;
05483 newptrb.epb_saved_eptr = eptr;
05484 eptrb = &newptrb;
05485 }
05486
05487
05488
05489 for (;!MuxAlarm.bAlarmed;)
05490 {
05491 op = *ecode;
05492 minimize = false;
05493
05494
05495
05496
05497
05498
05499
05500
05501
05502
05503
05504
05505
05506
05507
05508 if (op > OP_BRA)
05509 {
05510 number = op - OP_BRA;
05511
05512
05513
05514
05515 if (number > EXTRACT_BASIC_MAX)
05516 number = GET2(ecode, 2+LINK_SIZE);
05517 offset = number << 1;
05518
05519 if (offset < md->offset_max)
05520 {
05521 save_offset1 = md->offset_vector[offset];
05522 save_offset2 = md->offset_vector[offset+1];
05523 save_offset3 = md->offset_vector[md->offset_end - number];
05524 save_capture_last = md->capture_last;
05525
05526 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
05527 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
05528
05529 do
05530 {
05531 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
05532 match_isgroup);
05533 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05534 md->capture_last = save_capture_last;
05535 ecode += GET(ecode, 1);
05536 }
05537 while (*ecode == OP_ALT);
05538
05539 DPRINTF(("bracket %d failed\n", number));
05540
05541 md->offset_vector[offset] = save_offset1;
05542 md->offset_vector[offset+1] = save_offset2;
05543 md->offset_vector[md->offset_end - number] = save_offset3;
05544
05545 RRETURN(MATCH_NOMATCH);
05546 }
05547
05548
05549
05550 else op = OP_BRA;
05551 }
05552
05553
05554
05555 switch(op)
05556 {
05557 case OP_BRA:
05558 DPRINTF(("start bracket 0\n"));
05559 do
05560 {
05561 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
05562 match_isgroup);
05563 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05564 ecode += GET(ecode, 1);
05565 }
05566 while (*ecode == OP_ALT);
05567 DPRINTF(("bracket 0 failed\n"));
05568 RRETURN(MATCH_NOMATCH);
05569
05570
05571
05572
05573
05574
05575 case OP_COND:
05576 if (ecode[LINK_SIZE+1] == OP_CREF)
05577 {
05578 offset = GET2(ecode, LINK_SIZE+2) << 1;
05579 condition = (offset == CREF_RECURSE * 2)?
05580 (md->recursive != NULL) :
05581 (offset < offset_top && md->offset_vector[offset] >= 0);
05582 RMATCH(rrc, eptr, ecode + (condition?
05583 (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),
05584 offset_top, md, ims, eptrb, match_isgroup);
05585 RRETURN(rrc);
05586 }
05587
05588
05589
05590
05591 else
05592 {
05593 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
05594 match_condassert | match_isgroup);
05595 if (rrc == MATCH_MATCH)
05596 {
05597 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);
05598 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
05599 }
05600 else if (rrc != MATCH_NOMATCH)
05601 {
05602 RRETURN(rrc);
05603 }
05604 else ecode += GET(ecode, 1);
05605 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
05606 match_isgroup);
05607 RRETURN(rrc);
05608 }
05609
05610
05611
05612
05613
05614 case OP_CREF:
05615 case OP_BRANUMBER:
05616 ecode += 3;
05617 break;
05618
05619
05620
05621
05622 case OP_END:
05623 if (md->recursive != NULL && md->recursive->group_num == 0)
05624 {
05625 recursion_info *rec = md->recursive;
05626 DPRINTF(("Hit the end in a (?0) recursion\n"));
05627 md->recursive = rec->prevrec;
05628 memmove(md->offset_vector, rec->offset_save,
05629 rec->saved_max * sizeof(int));
05630 md->start_match = rec->save_start;
05631 ims = original_ims;
05632 ecode = rec->after_call;
05633 break;
05634 }
05635
05636
05637
05638
05639 if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
05640 md->end_match_ptr = eptr;
05641 md->end_offset_top = offset_top;
05642 RRETURN(MATCH_MATCH);
05643
05644
05645
05646 case OP_OPT:
05647 ims = ecode[1];
05648 ecode += 2;
05649 DPRINTF(("ims set to %02lx\n", ims));
05650 break;
05651
05652
05653
05654
05655
05656
05657
05658 case OP_ASSERT:
05659 case OP_ASSERTBACK:
05660 do
05661 {
05662 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
05663 match_isgroup);
05664 if (rrc == MATCH_MATCH) break;
05665 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05666 ecode += GET(ecode, 1);
05667 }
05668 while (*ecode == OP_ALT);
05669 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
05670
05671
05672
05673 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
05674
05675
05676
05677
05678 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
05679 ecode += 1 + LINK_SIZE;
05680 offset_top = md->end_offset_top;
05681 continue;
05682
05683
05684
05685 case OP_ASSERT_NOT:
05686 case OP_ASSERTBACK_NOT:
05687 do
05688 {
05689 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
05690 match_isgroup);
05691 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
05692 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05693 ecode += GET(ecode,1);
05694 }
05695 while (*ecode == OP_ALT);
05696
05697 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
05698
05699 ecode += 1 + LINK_SIZE;
05700 continue;
05701
05702
05703
05704
05705
05706
05707 case OP_REVERSE:
05708
05709
05710
05711 {
05712 eptr -= GET(ecode,1);
05713 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
05714 }
05715
05716
05717
05718 ecode += 1 + LINK_SIZE;
05719 break;
05720
05721
05722
05723
05724
05725 case OP_CALLOUT:
05726 if (pcre_callout != NULL)
05727 {
05728 pcre_callout_block cb;
05729 cb.version = 0;
05730 cb.callout_number = ecode[1];
05731 cb.offset_vector = md->offset_vector;
05732 cb.subject = (const char *)md->start_subject;
05733 cb.subject_length = md->end_subject - md->start_subject;
05734 cb.start_match = md->start_match - md->start_subject;
05735 cb.current_position = eptr - md->start_subject;
05736 cb.capture_top = offset_top/2;
05737 cb.capture_last = md->capture_last;
05738 cb.callout_data = md->callout_data;
05739 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
05740 if (rrc < 0) RRETURN(rrc);
05741 }
05742 ecode += 2;
05743 break;
05744
05745
05746
05747
05748
05749
05750
05751
05752
05753
05754
05755
05756
05757
05758
05759
05760
05761
05762
05763
05764 case OP_RECURSE:
05765 {
05766 callpat = md->start_code + GET(ecode, 1);
05767 new_recursive.group_num = *callpat - OP_BRA;
05768
05769
05770
05771
05772 if (new_recursive.group_num > EXTRACT_BASIC_MAX)
05773 new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);
05774
05775
05776
05777 new_recursive.prevrec = md->recursive;
05778 md->recursive = &new_recursive;
05779
05780
05781
05782 ecode += 1 + LINK_SIZE;
05783 new_recursive.after_call = ecode;
05784
05785
05786
05787 new_recursive.saved_max = md->offset_end;
05788 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
05789 new_recursive.offset_save = stacksave;
05790 else
05791 {
05792 new_recursive.offset_save =
05793 static_cast<int *>(malloc(new_recursive.saved_max * sizeof(int)));
05794 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
05795 }
05796
05797 memcpy(new_recursive.offset_save, md->offset_vector,
05798 new_recursive.saved_max * sizeof(int));
05799 new_recursive.save_start = md->start_match;
05800 md->start_match = eptr;
05801
05802
05803
05804
05805 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
05806 do
05807 {
05808 RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,
05809 eptrb, match_isgroup);
05810 if (rrc == MATCH_MATCH)
05811 {
05812 md->recursive = new_recursive.prevrec;
05813 if (new_recursive.offset_save != stacksave)
05814 free(new_recursive.offset_save);
05815 RRETURN(MATCH_MATCH);
05816 }
05817 else if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05818
05819 md->recursive = &new_recursive;
05820 memcpy(md->offset_vector, new_recursive.offset_save,
05821 new_recursive.saved_max * sizeof(int));
05822 callpat += GET(callpat, 1);
05823 }
05824 while (*callpat == OP_ALT);
05825
05826 DPRINTF(("Recursion didn't match\n"));
05827 md->recursive = new_recursive.prevrec;
05828 if (new_recursive.offset_save != stacksave)
05829 free(new_recursive.offset_save);
05830 RRETURN(MATCH_NOMATCH);
05831 }
05832
05833
05834
05835
05836
05837
05838
05839
05840
05841 case OP_ONCE:
05842 {
05843 prev = ecode;
05844 saved_eptr = eptr;
05845
05846 do
05847 {
05848 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
05849 eptrb, match_isgroup);
05850 if (rrc == MATCH_MATCH) break;
05851 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05852 ecode += GET(ecode,1);
05853 }
05854 while (*ecode == OP_ALT);
05855
05856
05857
05858 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
05859
05860
05861
05862
05863 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
05864
05865 offset_top = md->end_offset_top;
05866 eptr = md->end_match_ptr;
05867
05868
05869
05870
05871
05872
05873
05874 if (*ecode == OP_KET || eptr == saved_eptr)
05875 {
05876 ecode += 1+LINK_SIZE;
05877 break;
05878 }
05879
05880
05881
05882
05883
05884
05885 if (ecode[1+LINK_SIZE] == OP_OPT)
05886 {
05887 ims = (ims & ~PCRE_IMS) | ecode[4];
05888 DPRINTF(("ims set to %02lx at group repeat\n", ims));
05889 }
05890
05891 if (*ecode == OP_KETRMIN)
05892 {
05893 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
05894 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05895 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
05896 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05897 }
05898 else
05899 {
05900 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
05901 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05902 RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
05903 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05904 }
05905 }
05906 RRETURN(MATCH_NOMATCH);
05907
05908
05909
05910
05911 case OP_ALT:
05912 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
05913 break;
05914
05915
05916
05917
05918
05919
05920
05921 case OP_BRAZERO:
05922 {
05923 next = ecode+1;
05924 RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);
05925 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05926 do next += GET(next,1); while (*next == OP_ALT);
05927 ecode = next + 1+LINK_SIZE;
05928 }
05929 break;
05930
05931 case OP_BRAMINZERO:
05932 {
05933 next = ecode+1;
05934 do next += GET(next,1); while (*next == OP_ALT);
05935 RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,
05936 match_isgroup);
05937 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05938 ecode++;
05939 }
05940 break;
05941
05942
05943
05944
05945
05946
05947 case OP_KET:
05948 case OP_KETRMIN:
05949 case OP_KETRMAX:
05950 {
05951 prev = ecode - GET(ecode, 1);
05952 saved_eptr = eptrb->epb_saved_eptr;
05953
05954
05955
05956 eptrb = eptrb->epb_prev;
05957
05958 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
05959 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
05960 *prev == OP_ONCE)
05961 {
05962 md->end_match_ptr = eptr;
05963 md->end_offset_top = offset_top;
05964 RRETURN(MATCH_MATCH);
05965 }
05966
05967
05968
05969
05970
05971 if (*prev != OP_COND)
05972 {
05973 number = *prev - OP_BRA;
05974
05975
05976
05977
05978 if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
05979 offset = number << 1;
05980
05981
05982
05983
05984
05985
05986 if (number > 0)
05987 {
05988 md->capture_last = number;
05989 if (offset >= md->offset_max) md->offset_overflow = true; else
05990 {
05991 md->offset_vector[offset] =
05992 md->offset_vector[md->offset_end - number];
05993 md->offset_vector[offset+1] = eptr - md->start_subject;
05994 if (offset_top <= offset) offset_top = offset + 2;
05995 }
05996
05997
05998
05999
06000 if (md->recursive != NULL && md->recursive->group_num == number)
06001 {
06002 recursion_info *rec = md->recursive;
06003 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
06004 md->recursive = rec->prevrec;
06005 md->start_match = rec->save_start;
06006 memcpy(md->offset_vector, rec->offset_save,
06007 rec->saved_max * sizeof(int));
06008 ecode = rec->after_call;
06009 ims = original_ims;
06010 break;
06011 }
06012 }
06013 }
06014
06015
06016
06017
06018 ims = original_ims;
06019 DPRINTF(("ims reset to %02lx\n", ims));
06020
06021
06022
06023
06024
06025
06026
06027 if (*ecode == OP_KET || eptr == saved_eptr)
06028 {
06029 ecode += 1 + LINK_SIZE;
06030 break;
06031 }
06032
06033
06034
06035
06036 if (*ecode == OP_KETRMIN)
06037 {
06038 RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
06039 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06040 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
06041 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06042 }
06043 else
06044 {
06045 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
06046 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06047 RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
06048 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06049 }
06050 }
06051
06052 RRETURN(MATCH_NOMATCH);
06053
06054
06055
06056 case OP_CIRC:
06057 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
06058 if ((ims & PCRE_MULTILINE) != 0)
06059 {
06060 if (eptr != md->start_subject && eptr[-1] != NEWLINE)
06061 RRETURN(MATCH_NOMATCH);
06062 ecode++;
06063 break;
06064 }
06065
06066
06067
06068
06069 case OP_SOD:
06070 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
06071 ecode++;
06072 break;
06073
06074
06075
06076 case OP_SOM:
06077 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
06078 ecode++;
06079 break;
06080
06081
06082
06083
06084 case OP_DOLL:
06085 if ((ims & PCRE_MULTILINE) != 0)
06086 {
06087 if (eptr < md->end_subject)
06088 { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }
06089 else
06090 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
06091 ecode++;
06092 break;
06093 }
06094 else
06095 {
06096 if (md->noteol) RRETURN(MATCH_NOMATCH);
06097 if (!md->endonly)
06098 {
06099 if (eptr < md->end_subject - 1 ||
06100 (eptr == md->end_subject - 1 && *eptr != NEWLINE))
06101 RRETURN(MATCH_NOMATCH);
06102 ecode++;
06103 break;
06104 }
06105 }
06106
06107
06108
06109
06110 case OP_EOD:
06111 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
06112 ecode++;
06113 break;
06114
06115
06116
06117 case OP_EODN:
06118 if (eptr < md->end_subject - 1 ||
06119 (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);
06120 ecode++;
06121 break;
06122
06123
06124
06125 case OP_NOT_WORD_BOUNDARY:
06126 case OP_WORD_BOUNDARY:
06127 {
06128
06129
06130
06131
06132
06133
06134
06135
06136 {
06137 prev_is_word = (eptr != md->start_subject) &&
06138 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
06139 cur_is_word = (eptr < md->end_subject) &&
06140 ((md->ctypes[*eptr] & ctype_word) != 0);
06141 }
06142
06143
06144
06145 if ((*ecode++ == OP_WORD_BOUNDARY)?
06146 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
06147 RRETURN(MATCH_NOMATCH);
06148 }
06149 break;
06150
06151
06152
06153 case OP_ANY:
06154 if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
06155 RRETURN(MATCH_NOMATCH);
06156 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
06157 ecode++;
06158 break;
06159
06160
06161
06162
06163 case OP_ANYBYTE:
06164 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
06165 ecode++;
06166 break;
06167
06168 case OP_NOT_DIGIT:
06169 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06170 GETCHARINCTEST(c, eptr);
06171 if (
06172 (md->ctypes[c] & ctype_digit) != 0
06173 )
06174 RRETURN(MATCH_NOMATCH);
06175 ecode++;
06176 break;
06177
06178 case OP_DIGIT:
06179 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06180 GETCHARINCTEST(c, eptr);
06181 if (
06182 (md->ctypes[c] & ctype_digit) == 0
06183 )
06184 RRETURN(MATCH_NOMATCH);
06185 ecode++;
06186 break;
06187
06188 case OP_NOT_WHITESPACE:
06189 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06190 GETCHARINCTEST(c, eptr);
06191 if (
06192 (md->ctypes[c] & ctype_space) != 0
06193 )
06194 RRETURN(MATCH_NOMATCH);
06195 ecode++;
06196 break;
06197
06198 case OP_WHITESPACE:
06199 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06200 GETCHARINCTEST(c, eptr);
06201 if (
06202 (md->ctypes[c] & ctype_space) == 0
06203 )
06204 RRETURN(MATCH_NOMATCH);
06205 ecode++;
06206 break;
06207
06208 case OP_NOT_WORDCHAR:
06209 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06210 GETCHARINCTEST(c, eptr);
06211 if (
06212 (md->ctypes[c] & ctype_word) != 0
06213 )
06214 RRETURN(MATCH_NOMATCH);
06215 ecode++;
06216 break;
06217
06218 case OP_WORDCHAR:
06219 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06220 GETCHARINCTEST(c, eptr);
06221 if (
06222 (md->ctypes[c] & ctype_word) == 0
06223 )
06224 RRETURN(MATCH_NOMATCH);
06225 ecode++;
06226 break;
06227
06228
06229
06230
06231
06232
06233
06234
06235
06236 case OP_REF:
06237 {
06238 offset = GET2(ecode, 1) << 1;
06239 ecode += 3;
06240
06241
06242
06243
06244
06245
06246 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
06247 md->end_subject - eptr + 1 :
06248 md->offset_vector[offset+1] - md->offset_vector[offset];
06249
06250
06251
06252 switch (*ecode)
06253 {
06254 case OP_CRSTAR:
06255 case OP_CRMINSTAR:
06256 case OP_CRPLUS:
06257 case OP_CRMINPLUS:
06258 case OP_CRQUERY:
06259 case OP_CRMINQUERY:
06260 c = *ecode++ - OP_CRSTAR;
06261 minimize = (c & 1) != 0;
06262 min = rep_min[c];
06263 max = rep_max[c];
06264 if (max == 0) max = INT_MAX;
06265 break;
06266
06267 case OP_CRRANGE:
06268 case OP_CRMINRANGE:
06269 minimize = (*ecode == OP_CRMINRANGE);
06270 min = GET2(ecode, 1);
06271 max = GET2(ecode, 3);
06272 if (max == 0) max = INT_MAX;
06273 ecode += 5;
06274 break;
06275
06276 default:
06277 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
06278 eptr += length;
06279 continue;
06280 }
06281
06282
06283
06284
06285 if (length == 0) continue;
06286
06287
06288
06289
06290
06291 for (i = 1; i <= min; i++)
06292 {
06293 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
06294 eptr += length;
06295 }
06296
06297
06298
06299
06300 if (min == max) continue;
06301
06302
06303
06304 if (minimize)
06305 {
06306 for (fi = min;; fi++)
06307 {
06308 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06309 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06310 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
06311 RRETURN(MATCH_NOMATCH);
06312 eptr += length;
06313 }
06314
06315 }
06316
06317
06318
06319 else
06320 {
06321 pp = eptr;
06322 for (i = min; i < max; i++)
06323 {
06324 if (!match_ref(offset, eptr, length, md, ims)) break;
06325 eptr += length;
06326 }
06327 while (eptr >= pp)
06328 {
06329 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06330 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06331 eptr -= length;
06332 }
06333 RRETURN(MATCH_NOMATCH);
06334 }
06335 }
06336
06337
06338
06339
06340
06341
06342
06343
06344
06345
06346
06347
06348
06349 case OP_NCLASS:
06350 case OP_CLASS:
06351 {
06352 data = ecode + 1;
06353 ecode += 33;
06354
06355 switch (*ecode)
06356 {
06357 case OP_CRSTAR:
06358 case OP_CRMINSTAR:
06359 case OP_CRPLUS:
06360 case OP_CRMINPLUS:
06361 case OP_CRQUERY:
06362 case OP_CRMINQUERY:
06363 c = *ecode++ - OP_CRSTAR;
06364 minimize = (c & 1) != 0;
06365 min = rep_min[c];
06366 max = rep_max[c];
06367 if (max == 0) max = INT_MAX;
06368 break;
06369
06370 case OP_CRRANGE:
06371 case OP_CRMINRANGE:
06372 minimize = (*ecode == OP_CRMINRANGE);
06373 min = GET2(ecode, 1);
06374 max = GET2(ecode, 3);
06375 if (max == 0) max = INT_MAX;
06376 ecode += 5;
06377 break;
06378
06379 default:
06380 min = max = 1;
06381 break;
06382 }
06383
06384
06385
06386
06387 {
06388 for (i = 1; i <= min; i++)
06389 {
06390 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06391 c = *eptr++;
06392 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
06393 }
06394 }
06395
06396
06397
06398
06399 if (min == max) continue;
06400
06401
06402
06403
06404 if (minimize)
06405 {
06406
06407 {
06408 for (fi = min;; fi++)
06409 {
06410 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06411 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06412 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06413 c = *eptr++;
06414 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
06415 }
06416 }
06417
06418 }
06419
06420
06421
06422 else
06423 {
06424 pp = eptr;
06425
06426
06427 {
06428 for (i = min; i < max; i++)
06429 {
06430 if (eptr >= md->end_subject) break;
06431 c = *eptr;
06432 if ((data[c/8] & (1 << (c&7))) == 0) break;
06433 eptr++;
06434 }
06435 while (eptr >= pp)
06436 {
06437 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06438 eptr--;
06439 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06440 }
06441 }
06442
06443 RRETURN(MATCH_NOMATCH);
06444 }
06445 }
06446
06447
06448
06449
06450
06451
06452
06453
06454
06455 case OP_CHARS:
06456 {
06457 register int slen = ecode[1];
06458 ecode += 2;
06459
06460 if (slen > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
06461 if ((ims & PCRE_CASELESS) != 0)
06462 {
06463 while (slen-- > 0)
06464 if (md->lcc[*ecode++] != md->lcc[*eptr++])
06465 RRETURN(MATCH_NOMATCH);
06466 }
06467 else
06468 {
06469 while (slen-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
06470 }
06471 }
06472 break;
06473
06474
06475
06476 case OP_EXACT:
06477 min = max = GET2(ecode, 1);
06478 ecode += 3;
06479 goto REPEATCHAR;
06480
06481 case OP_UPTO:
06482 case OP_MINUPTO:
06483 min = 0;
06484 max = GET2(ecode, 1);
06485 minimize = *ecode == OP_MINUPTO;
06486 ecode += 3;
06487 goto REPEATCHAR;
06488
06489 case OP_STAR:
06490 case OP_MINSTAR:
06491 case OP_PLUS:
06492 case OP_MINPLUS:
06493 case OP_QUERY:
06494 case OP_MINQUERY:
06495 c = *ecode++ - OP_STAR;
06496 minimize = (c & 1) != 0;
06497 min = rep_min[c];
06498 max = rep_max[c];
06499 if (max == 0) max = INT_MAX;
06500
06501
06502
06503
06504
06505 REPEATCHAR:
06506
06507
06508 {
06509 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
06510 fc = *ecode++;
06511 }
06512
06513
06514
06515
06516
06517
06518
06519
06520
06521
06522 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
06523 max, eptr));
06524
06525 if ((ims & PCRE_CASELESS) != 0)
06526 {
06527 fc = md->lcc[fc];
06528 for (i = 1; i <= min; i++)
06529 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
06530 if (min == max) continue;
06531 if (minimize)
06532 {
06533 for (fi = min;; fi++)
06534 {
06535 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06536 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06537 if (fi >= max || eptr >= md->end_subject ||
06538 fc != md->lcc[*eptr++])
06539 RRETURN(MATCH_NOMATCH);
06540 }
06541
06542 }
06543 else
06544 {
06545 pp = eptr;
06546 for (i = min; i < max; i++)
06547 {
06548 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
06549 eptr++;
06550 }
06551 while (eptr >= pp)
06552 {
06553 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06554 eptr--;
06555 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06556 }
06557 RRETURN(MATCH_NOMATCH);
06558 }
06559
06560 }
06561
06562
06563
06564 else
06565 {
06566 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
06567 if (min == max) continue;
06568 if (minimize)
06569 {
06570 for (fi = min;; fi++)
06571 {
06572 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06573 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06574 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
06575 RRETURN(MATCH_NOMATCH);
06576 }
06577
06578 }
06579 else
06580 {
06581 pp = eptr;
06582 for (i = min; i < max; i++)
06583 {
06584 if (eptr >= md->end_subject || fc != *eptr) break;
06585 eptr++;
06586 }
06587 while (eptr >= pp)
06588 {
06589 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06590 eptr--;
06591 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06592 }
06593 RRETURN(MATCH_NOMATCH);
06594 }
06595 }
06596
06597
06598
06599
06600
06601 case OP_NOT:
06602 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06603 ecode++;
06604 GETCHARINCTEST(c, eptr);
06605 if ((ims & PCRE_CASELESS) != 0)
06606 {
06607 c = md->lcc[c];
06608 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
06609 }
06610 else
06611 {
06612 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
06613 }
06614 break;
06615
06616
06617
06618
06619
06620
06621
06622
06623 case OP_NOTEXACT:
06624 min = max = GET2(ecode, 1);
06625 ecode += 3;
06626 goto REPEATNOTCHAR;
06627
06628 case OP_NOTUPTO:
06629 case OP_NOTMINUPTO:
06630 min = 0;
06631 max = GET2(ecode, 1);
06632 minimize = *ecode == OP_NOTMINUPTO;
06633 ecode += 3;
06634 goto REPEATNOTCHAR;
06635
06636 case OP_NOTSTAR:
06637 case OP_NOTMINSTAR:
06638 case OP_NOTPLUS:
06639 case OP_NOTMINPLUS:
06640 case OP_NOTQUERY:
06641 case OP_NOTMINQUERY:
06642 c = *ecode++ - OP_NOTSTAR;
06643 minimize = (c & 1) != 0;
06644 min = rep_min[c];
06645 max = rep_max[c];
06646 if (max == 0) max = INT_MAX;
06647
06648
06649
06650
06651
06652 REPEATNOTCHAR:
06653 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
06654 fc = *ecode++;
06655
06656
06657
06658
06659
06660
06661
06662
06663
06664 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
06665 max, eptr));
06666
06667 if ((ims & PCRE_CASELESS) != 0)
06668 {
06669 fc = md->lcc[fc];
06670
06671
06672
06673 {
06674 for (i = 1; i <= min; i++)
06675 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
06676 }
06677
06678 if (min == max) continue;
06679
06680 if (minimize)
06681 {
06682
06683 {
06684 for (fi = min;; fi++)
06685 {
06686 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06687 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06688 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
06689 RRETURN(MATCH_NOMATCH);
06690 }
06691 }
06692
06693 }
06694
06695
06696
06697 else
06698 {
06699 pp = eptr;
06700
06701
06702 {
06703 for (i = min; i < max; i++)
06704 {
06705 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
06706 eptr++;
06707 }
06708 while (eptr >= pp)
06709 {
06710 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06711 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06712 eptr--;
06713 }
06714 }
06715
06716 RRETURN(MATCH_NOMATCH);
06717 }
06718
06719 }
06720
06721
06722
06723 else
06724 {
06725
06726 {
06727 for (i = 1; i <= min; i++)
06728 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
06729 }
06730
06731 if (min == max) continue;
06732
06733 if (minimize)
06734 {
06735
06736 {
06737 for (fi = min;; fi++)
06738 {
06739 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06740 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06741 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
06742 RRETURN(MATCH_NOMATCH);
06743 }
06744 }
06745
06746 }
06747
06748
06749
06750 else
06751 {
06752 pp = eptr;
06753
06754
06755 {
06756 for (i = min; i < max; i++)
06757 {
06758 if (eptr >= md->end_subject || fc == *eptr) break;
06759 eptr++;
06760 }
06761 while (eptr >= pp)
06762 {
06763 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06764 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06765 eptr--;
06766 }
06767 }
06768
06769 RRETURN(MATCH_NOMATCH);
06770 }
06771 }
06772
06773
06774
06775
06776
06777
06778 case OP_TYPEEXACT:
06779 min = max = GET2(ecode, 1);
06780 minimize = true;
06781 ecode += 3;
06782 goto REPEATTYPE;
06783
06784 case OP_TYPEUPTO:
06785 case OP_TYPEMINUPTO:
06786 min = 0;
06787 max = GET2(ecode, 1);
06788 minimize = *ecode == OP_TYPEMINUPTO;
06789 ecode += 3;
06790 goto REPEATTYPE;
06791
06792 case OP_TYPESTAR:
06793 case OP_TYPEMINSTAR:
06794 case OP_TYPEPLUS:
06795 case OP_TYPEMINPLUS:
06796 case OP_TYPEQUERY:
06797 case OP_TYPEMINQUERY:
06798 c = *ecode++ - OP_TYPESTAR;
06799 minimize = (c & 1) != 0;
06800 min = rep_min[c];
06801 max = rep_max[c];
06802 if (max == 0) max = INT_MAX;
06803
06804
06805
06806
06807
06808 REPEATTYPE:
06809 ctype = *ecode++;
06810
06811
06812
06813
06814
06815
06816
06817
06818 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
06819 if (min > 0)
06820 {
06821
06822
06823
06824 switch(ctype)
06825 {
06826 case OP_ANY:
06827 if ((ims & PCRE_DOTALL) == 0)
06828 {
06829 for (i = 1; i <= min; i++)
06830 if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);
06831 }
06832 else eptr += min;
06833 break;
06834
06835 case OP_ANYBYTE:
06836 eptr += min;
06837 break;
06838
06839 case OP_NOT_DIGIT:
06840 for (i = 1; i <= min; i++)
06841 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
06842 break;
06843
06844 case OP_DIGIT:
06845 for (i = 1; i <= min; i++)
06846 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
06847 break;
06848
06849 case OP_NOT_WHITESPACE:
06850 for (i = 1; i <= min; i++)
06851 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
06852 break;
06853
06854 case OP_WHITESPACE:
06855 for (i = 1; i <= min; i++)
06856 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
06857 break;
06858
06859 case OP_NOT_WORDCHAR:
06860 for (i = 1; i <= min; i++)
06861 if ((md->ctypes[*eptr++] & ctype_word) != 0)
06862 RRETURN(MATCH_NOMATCH);
06863 break;
06864
06865 case OP_WORDCHAR:
06866 for (i = 1; i <= min; i++)
06867 if ((md->ctypes[*eptr++] & ctype_word) == 0)
06868 RRETURN(MATCH_NOMATCH);
06869 break;
06870 }
06871 }
06872
06873
06874
06875 if (min == max) continue;
06876
06877
06878
06879
06880 if (minimize)
06881 {
06882
06883 {
06884 for (fi = min;; fi++)
06885 {
06886 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
06887 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
06888 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
06889 c = *eptr++;
06890 switch(ctype)
06891 {
06892 case OP_ANY:
06893 if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
06894 break;
06895
06896 case OP_ANYBYTE:
06897 break;
06898
06899 case OP_NOT_DIGIT:
06900 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
06901 break;
06902
06903 case OP_DIGIT:
06904 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
06905 break;
06906
06907 case OP_NOT_WHITESPACE:
06908 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
06909 break;
06910
06911 case OP_WHITESPACE:
06912 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
06913 break;
06914
06915 case OP_NOT_WORDCHAR:
06916 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
06917 break;
06918
06919 case OP_WORDCHAR:
06920 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
06921 break;
06922 }
06923 }
06924 }
06925
06926 }
06927
06928
06929
06930
06931
06932 else
06933 {
06934 pp = eptr;
06935
06936
06937 {
06938 switch(ctype)
06939 {
06940 case OP_ANY:
06941 if ((ims & PCRE_DOTALL) == 0)
06942 {
06943 for (i = min; i < max; i++)
06944 {
06945 if (eptr >= md->end_subject || *eptr == NEWLINE) break;
06946 eptr++;
06947 }
06948 break;
06949 }
06950
06951
06952 case OP_ANYBYTE:
06953 c = max - min;
06954 if (c > md->end_subject - eptr) c = md->end_subject - eptr;
06955 eptr += c;
06956 break;
06957
06958 case OP_NOT_DIGIT:
06959 for (i = min; i < max; i++)
06960 {
06961 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
06962 break;
06963 eptr++;
06964 }
06965 break;
06966
06967 case OP_DIGIT:
06968 for (i = min; i < max; i++)
06969 {
06970 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
06971 break;
06972 eptr++;
06973 }
06974 break;
06975
06976 case OP_NOT_WHITESPACE:
06977 for (i = min; i < max; i++)
06978 {
06979 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
06980 break;
06981 eptr++;
06982 }
06983 break;
06984
06985 case OP_WHITESPACE:
06986 for (i = min; i < max; i++)
06987 {
06988 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
06989 break;
06990 eptr++;
06991 }
06992 break;
06993
06994 case OP_NOT_WORDCHAR:
06995 for (i = min; i < max; i++)
06996 {
06997 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
06998 break;
06999 eptr++;
07000 }
07001 break;
07002
07003 case OP_WORDCHAR:
07004 for (i = min; i < max; i++)
07005 {
07006 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
07007 break;
07008 eptr++;
07009 }
07010 break;
07011 }
07012
07013
07014
07015 while (eptr >= pp)
07016 {
07017 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
07018 eptr--;
07019 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
07020 }
07021 }
07022
07023
07024
07025 RRETURN(MATCH_NOMATCH);
07026 }
07027
07028
07029
07030
07031
07032
07033
07034 default:
07035 DPRINTF(("Unknown opcode %d\n", *ecode));
07036 RRETURN(PCRE_ERROR_UNKNOWN_NODE);
07037 }
07038
07039
07040
07041
07042
07043 }
07044 RRETURN(MATCH_NOMATCH);
07045 }
07046
07047
07048
07049
07050
07051
07052
07053
07054
07055
07056 #undef fc
07057 #undef fi
07058
07059
07060
07061
07062
07063
07064
07065
07066
07067
07068
07069
07070
07071
07072
07073
07074
07075
07076
07077
07078
07079
07080
07081
07082
07083
07084
07085
07086
07087
07088 int
07089 pcre_exec(const pcre *external_re, const pcre_extra *extra_data,
07090 const char *subject, int length, int start_offset, int options, int *offsets,
07091 int offsetcount)
07092 {
07093 int rc, resetcount, ocount;
07094 int first_byte = -1;
07095 int req_byte = -1;
07096 int req_byte2 = -1;
07097 unsigned long int ims = 0;
07098 bool using_temporary_offsets = false;
07099 bool anchored;
07100 bool startline;
07101 bool first_byte_caseless = false;
07102 bool req_byte_caseless = false;
07103 match_data match_block;
07104 const uschar *start_bits = NULL;
07105 const uschar *start_match = (const uschar *)subject + start_offset;
07106 const uschar *end_subject;
07107 const uschar *req_byte_ptr = start_match - 1;
07108 const pcre_study_data *study;
07109 const real_pcre *re = (const real_pcre *)external_re;
07110
07111
07112
07113 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
07114 if (re == NULL || subject == NULL ||
07115 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
07116
07117
07118
07119
07120 study = NULL;
07121 match_block.match_limit = MATCH_LIMIT;
07122 match_block.callout_data = NULL;
07123
07124 if (extra_data != NULL)
07125 {
07126 register unsigned int flags = extra_data->flags;
07127 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
07128 study = (const pcre_study_data *)extra_data->study_data;
07129 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
07130 match_block.match_limit = extra_data->match_limit;
07131 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
07132 match_block.callout_data = extra_data->callout_data;
07133 }
07134
07135
07136
07137 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
07138
07139 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
07140 startline = (re->options & PCRE_STARTLINE) != 0;
07141
07142 match_block.start_code =
07143 (const uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size;
07144 match_block.start_subject = (const uschar *)subject;
07145 match_block.start_offset = start_offset;
07146 match_block.end_subject = match_block.start_subject + length;
07147 end_subject = match_block.end_subject;
07148
07149 match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
07150 match_block.utf8 = (re->options & PCRE_UTF8) != 0;
07151
07152 match_block.notbol = (options & PCRE_NOTBOL) != 0;
07153 match_block.noteol = (options & PCRE_NOTEOL) != 0;
07154 match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
07155
07156 match_block.recursive = NULL;
07157
07158 match_block.lcc = re->tables + lcc_offset;
07159 match_block.ctypes = re->tables + ctypes_offset;
07160
07161
07162
07163
07164
07165
07166
07167
07168 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
07169
07170
07171
07172
07173
07174
07175 ocount = offsetcount - (offsetcount % 3);
07176
07177 if (re->top_backref > 0 && re->top_backref >= ocount/3)
07178 {
07179 ocount = re->top_backref * 3 + 3;
07180 match_block.offset_vector = static_cast<int *>(malloc(ocount * sizeof(int)));
07181 if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
07182 using_temporary_offsets = true;
07183 DPRINTF(("Got memory to hold back references\n"));
07184 }
07185 else match_block.offset_vector = offsets;
07186
07187 match_block.offset_end = ocount;
07188 match_block.offset_max = (2*ocount)/3;
07189 match_block.offset_overflow = false;
07190 match_block.capture_last = -1;
07191
07192
07193
07194
07195
07196 resetcount = 2 + re->top_bracket * 2;
07197 if (resetcount > offsetcount) resetcount = ocount;
07198
07199
07200
07201
07202
07203 if (match_block.offset_vector != NULL)
07204 {
07205 register int *iptr = match_block.offset_vector + ocount;
07206 register int *iend = iptr - resetcount/2 + 1;
07207 while (--iptr >= iend) *iptr = -1;
07208 }
07209
07210
07211
07212
07213
07214
07215
07216 if (!anchored)
07217 {
07218 if ((re->options & PCRE_FIRSTSET) != 0)
07219 {
07220 first_byte = re->first_byte & 255;
07221 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == true)
07222 first_byte = match_block.lcc[first_byte];
07223 }
07224 else
07225 if (!startline && study != NULL &&
07226 (study->options & PCRE_STUDY_MAPPED) != 0)
07227 start_bits = study->start_bits;
07228 }
07229
07230
07231
07232
07233 if ((re->options & PCRE_REQCHSET) != 0)
07234 {
07235 req_byte = re->req_byte & 255;
07236 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
07237 req_byte2 = (re->tables + fcc_offset)[req_byte];
07238 }
07239
07240
07241
07242
07243 do
07244 {
07245 register int *iptr = match_block.offset_vector;
07246 register int *iend = iptr + resetcount;
07247
07248
07249
07250 while (iptr < iend) *iptr++ = -1;
07251
07252
07253
07254 if (first_byte >= 0)
07255 {
07256 if (first_byte_caseless)
07257 while (start_match < end_subject &&
07258 match_block.lcc[*start_match] != first_byte)
07259 start_match++;
07260 else
07261 while (start_match < end_subject && *start_match != first_byte)
07262 start_match++;
07263 }
07264
07265
07266
07267 else if (startline)
07268 {
07269 if (start_match > match_block.start_subject + start_offset)
07270 {
07271 while (start_match < end_subject && start_match[-1] != NEWLINE)
07272 start_match++;
07273 }
07274 }
07275
07276
07277
07278 else if (start_bits != NULL)
07279 {
07280 while (start_match < end_subject)
07281 {
07282 register int c = *start_match;
07283 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
07284 }
07285 }
07286
07287
07288
07289
07290
07291
07292
07293
07294
07295
07296
07297
07298
07299
07300 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
07301 {
07302 register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);
07303
07304
07305
07306
07307 if (p > req_byte_ptr)
07308 {
07309 if (req_byte_caseless)
07310 {
07311 while (p < end_subject)
07312 {
07313 register int pp = *p++;
07314 if (pp == req_byte || pp == req_byte2) { p--; break; }
07315 }
07316 }
07317 else
07318 {
07319 while (p < end_subject)
07320 {
07321 if (*p++ == req_byte) { p--; break; }
07322 }
07323 }
07324
07325
07326
07327 if (p >= end_subject) break;
07328
07329
07330
07331
07332
07333 req_byte_ptr = p;
07334 }
07335 }
07336
07337
07338
07339
07340
07341
07342
07343
07344 match_block.start_match = start_match;
07345 match_block.match_call_count = 0;
07346
07347 rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,
07348 match_isgroup);
07349
07350 if (rc == MATCH_NOMATCH)
07351 {
07352 start_match++;
07353 continue;
07354 }
07355
07356 if (rc != MATCH_MATCH)
07357 {
07358 DPRINTF((">>>> error: returning %d\n", rc));
07359 return rc;
07360 }
07361
07362
07363
07364
07365 if (using_temporary_offsets)
07366 {
07367 if (offsetcount >= 4)
07368 {
07369 memcpy(offsets + 2, match_block.offset_vector + 2,
07370 (offsetcount - 2) * sizeof(int));
07371 DPRINTF(("Copied offsets from temporary memory\n"));
07372 }
07373 if (match_block.end_offset_top > offsetcount)
07374 match_block.offset_overflow = true;
07375
07376 DPRINTF(("Freeing temporary memory\n"));
07377 free(match_block.offset_vector);
07378 }
07379
07380 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
07381
07382 if (offsetcount < 2) rc = 0; else
07383 {
07384 offsets[0] = start_match - match_block.start_subject;
07385 offsets[1] = match_block.end_match_ptr - match_block.start_subject;
07386 }
07387
07388 DPRINTF((">>>> returning %d\n", rc));
07389 return rc;
07390 }
07391
07392
07393
07394 while (!anchored && start_match <= end_subject);
07395
07396 if (using_temporary_offsets)
07397 {
07398 DPRINTF(("Freeing temporary memory\n"));
07399 free(match_block.offset_vector);
07400 }
07401
07402 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
07403
07404 return PCRE_ERROR_NOMATCH;
07405 }
07406
07407