Context Navigation

source: XMLIO_V2/external/src/POCO/Foundation.save/pcre_exec.c @ 80

Last change on this file since 80 was 80, checked in by ymipsl, 14 years ago
ajout lib externe
Property svn:eol-style set to `native`
File size: 147.5 KB

Line
1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/* PCRE is a library of functions to support regular expressions whose syntax
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Copyright (c) 1997-2008 University of Cambridge
10
11	-----------------------------------------------------------------------------
12	Redistribution and use in source and binary forms, with or without
13	modification, are permitted provided that the following conditions are met:
14
15	* Redistributions of source code must retain the above copyright notice,
16	this list of conditions and the following disclaimer.
17
18	* Redistributions in binary form must reproduce the above copyright
19	notice, this list of conditions and the following disclaimer in the
20	documentation and/or other materials provided with the distribution.
21
22	* Neither the name of the University of Cambridge nor the names of its
23	contributors may be used to endorse or promote products derived from
24	this software without specific prior written permission.
25
26	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36	POSSIBILITY OF SUCH DAMAGE.
37	-----------------------------------------------------------------------------
38	*/
39
40
41	/* This module contains pcre_exec(), the externally visible function that does
42	pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43	possible. There are also some static supporting functions. */
44
45	#include "pcre_config.h"
46
47	#define NLBLOCK md /* Block containing newline information */
48	#define PSSTART start_subject /* Field containing processed string start */
49	#define PSEND end_subject /* Field containing processed string end */
50
51	#include "pcre_internal.h"
52
53	/* Undefine some potentially clashing cpp symbols */
54
55	#undef min
56	#undef max
57
58	/* Flag bits for the match() function */
59
60	#define match_condassert 0x01 /* Called to check a condition assertion */
61	#define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
62
63	/* Non-error returns from the match() function. Error returns are externally
64	defined PCRE_ERROR_xxx codes, which are all negative. */
65
66	#define MATCH_MATCH 1
67	#define MATCH_NOMATCH 0
68
69	/* Special internal returns from the match() function. Make them sufficiently
70	negative to avoid the external error codes. */
71
72	#define MATCH_COMMIT (-999)
73	#define MATCH_PRUNE (-998)
74	#define MATCH_SKIP (-997)
75	#define MATCH_THEN (-996)
76
77	/* Maximum number of ints of offset to save on the stack for recursive calls.
78	If the offset vector is bigger, malloc is used. This should be a multiple of 3,
79	because the offset vector is always a multiple of 3 long. */
80
81	#define REC_STACK_SAVE_MAX 30
82
83	/* Min and max values for the common repeats; for the maxima, 0 => infinity */
84
85	static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
86	static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
87
88
89
90	#ifdef DEBUG
91	/*************************************************
92	* Debugging function to print chars *
93	*************************************************/
94
95	/* Print a sequence of chars in printable format, stopping at the end of the
96	subject if the requested.
97
98	Arguments:
99	p points to characters
100	length number to print
101	is_subject TRUE if printing from within md->start_subject
102	md pointer to matching data block, if is_subject is TRUE
103
104	Returns: nothing
105	*/
106
107	static void
108	pchars(const uschar p, int length, BOOL is_subject, match_data md)
109	{
110	unsigned int c;
111	if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
112	while (length-- > 0)
113	if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
114	}
115	#endif
116
117
118
119	/*************************************************
120	* Match a back-reference *
121	*************************************************/
122
123	/* If a back reference hasn't been set, the length that is passed is greater
124	than the number of characters left in the string, so the match fails.
125
126	Arguments:
127	offset index into the offset vector
128	eptr points into the subject
129	length length to be matched
130	md points to match data block
131	ims the ims flags
132
133	Returns: TRUE if matched
134	*/
135
136	static BOOL
137	match_ref(int offset, register USPTR eptr, int length, match_data *md,
138	unsigned long int ims)
139	{
140	USPTR p = md->start_subject + md->offset_vector[offset];
141
142	#ifdef DEBUG
143	if (eptr >= md->end_subject)
144	printf("matching subject <null>");
145	else
146	{
147	printf("matching subject ");
148	pchars(eptr, length, TRUE, md);
149	}
150	printf(" against backref ");
151	pchars(p, length, FALSE, md);
152	printf("\n");
153	#endif
154
155	/* Always fail if not enough characters left */
156
157	if (length > md->end_subject - eptr) return FALSE;
158
159	/* Separate the caseless case for speed. In UTF-8 mode we can only do this
160	properly if Unicode properties are supported. Otherwise, we can check only
161	ASCII characters. */
162
163	if ((ims & PCRE_CASELESS) != 0)
164	{
165	#ifdef SUPPORT_UTF8
166	#ifdef SUPPORT_UCP
167	if (md->utf8)
168	{
169	USPTR endptr = eptr + length;
170	while (eptr < endptr)
171	{
172	int c, d;
173	GETCHARINC(c, eptr);
174	GETCHARINC(d, p);
175	if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
176	}
177	}
178	else
179	#endif
180	#endif
181
182	/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
183	is no UCP support. */
184
185	while (length-- > 0)
186	{ if (md->lcc[p++] != md->lcc[eptr++]) return FALSE; }
187	}
188
189	/* In the caseful case, we can just compare the bytes, whether or not we
190	are in UTF-8 mode. */
191
192	else
193	{ while (length-- > 0) if (p++ != eptr++) return FALSE; }
194
195	return TRUE;
196	}
197
198
199
200	/***************************************************************************
201	****************************************************************************
202	RECURSION IN THE match() FUNCTION
203
204	The match() function is highly recursive, though not every recursive call
205	increases the recursive depth. Nevertheless, some regular expressions can cause
206	it to recurse to a great depth. I was writing for Unix, so I just let it call
207	itself recursively. This uses the stack for saving everything that has to be
208	saved for a recursive call. On Unix, the stack can be large, and this works
209	fine.
210
211	It turns out that on some non-Unix-like systems there are problems with
212	programs that use a lot of stack. (This despite the fact that every last chip
213	has oodles of memory these days, and techniques for extending the stack have
214	been known for decades.) So....
215
216	There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
217	calls by keeping local variables that need to be preserved in blocks of memory
218	obtained from malloc() instead instead of on the stack. Macros are used to
219	achieve this so that the actual code doesn't look very different to what it
220	always used to.
221
222	The original heap-recursive code used longjmp(). However, it seems that this
223	can be very slow on some operating systems. Following a suggestion from Stan
224	Switzer, the use of longjmp() has been abolished, at the cost of having to
225	provide a unique number for each call to RMATCH. There is no way of generating
226	a sequence of numbers at compile time in C. I have given them names, to make
227	them stand out more clearly.
228
229	Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
230	FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
231	tests. Furthermore, not using longjmp() means that local dynamic variables
232	don't have indeterminate values; this has meant that the frame size can be
233	reduced because the result can be "passed back" by straight setting of the
234	variable instead of being passed in the frame.
235	****************************************************************************
236	***************************************************************************/
237
238	/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
239	below must be updated in sync. */
240
241	enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
242	RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
243	RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
244	RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
245	RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
246	RM51, RM52, RM53, RM54 };
247
248	/* These versions of the macros use the stack, as normal. There are debugging
249	versions and production versions. Note that the "rw" argument of RMATCH isn't
250	actuall used in this definition. */
251
252	#ifndef NO_RECURSE
253	#define REGISTER register
254
255	#ifdef DEBUG
256	#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
257	{ \
258	printf("match() called in line %d\n", __LINE__); \
259	rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
260	printf("to line %d\n", __LINE__); \
261	}
262	#define RRETURN(ra) \
263	{ \
264	printf("match() returned %d from line %d ", ra, __LINE__); \
265	return ra; \
266	}
267	#else
268	#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
269	rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
270	#define RRETURN(ra) return ra
271	#endif
272
273	#else
274
275
276	/* These versions of the macros manage a private stack on the heap. Note that
277	the "rd" argument of RMATCH isn't actually used in this definition. It's the md
278	argument of match(), which never changes. */
279
280	#define REGISTER
281
282	#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
283	{\
284	heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
285	frame->Xwhere = rw; \
286	newframe->Xeptr = ra;\
287	newframe->Xecode = rb;\
288	newframe->Xmstart = mstart;\
289	newframe->Xoffset_top = rc;\
290	newframe->Xims = re;\
291	newframe->Xeptrb = rf;\
292	newframe->Xflags = rg;\
293	newframe->Xrdepth = frame->Xrdepth + 1;\
294	newframe->Xprevframe = frame;\
295	frame = newframe;\
296	DPRINTF(("restarting from line %d\n", __LINE__));\
297	goto HEAP_RECURSE;\
298	L_##rw:\
299	DPRINTF(("jumped back to line %d\n", __LINE__));\
300	}
301
302	#define RRETURN(ra)\
303	{\
304	heapframe *newframe = frame;\
305	frame = newframe->Xprevframe;\
306	(pcre_stack_free)(newframe);\
307	if (frame != NULL)\
308	{\
309	rrc = ra;\
310	goto HEAP_RETURN;\
311	}\
312	return ra;\
313	}
314
315
316	/* Structure for remembering the local variables in a private frame */
317
318	typedef struct heapframe {
319	struct heapframe *Xprevframe;
320
321	/* Function arguments that may change */
322
323	const uschar *Xeptr;
324	const uschar *Xecode;
325	const uschar *Xmstart;
326	int Xoffset_top;
327	long int Xims;
328	eptrblock *Xeptrb;
329	int Xflags;
330	unsigned int Xrdepth;
331
332	/* Function local variables */
333
334	const uschar *Xcallpat;
335	const uschar *Xcharptr;
336	const uschar *Xdata;
337	const uschar *Xnext;
338	const uschar *Xpp;
339	const uschar *Xprev;
340	const uschar *Xsaved_eptr;
341
342	recursion_info Xnew_recursive;
343
344	BOOL Xcur_is_word;
345	BOOL Xcondition;
346	BOOL Xprev_is_word;
347
348	unsigned long int Xoriginal_ims;
349
350	#ifdef SUPPORT_UCP
351	int Xprop_type;
352	int Xprop_value;
353	int Xprop_fail_result;
354	int Xprop_category;
355	int Xprop_chartype;
356	int Xprop_script;
357	int Xoclength;
358	uschar Xocchars[8];
359	#endif
360
361	int Xctype;
362	unsigned int Xfc;
363	int Xfi;
364	int Xlength;
365	int Xmax;
366	int Xmin;
367	int Xnumber;
368	int Xoffset;
369	int Xop;
370	int Xsave_capture_last;
371	int Xsave_offset1, Xsave_offset2, Xsave_offset3;
372	int Xstacksave[REC_STACK_SAVE_MAX];
373
374	eptrblock Xnewptrb;
375
376	/* Where to jump back to */
377
378	int Xwhere;
379
380	} heapframe;
381
382	#endif
383
384
385	/***************************************************************************
386	***************************************************************************/
387
388
389
390	/*************************************************
391	* Match from current position *
392	*************************************************/
393
394	/* This function is called recursively in many circumstances. Whenever it
395	returns a negative (error) response, the outer incarnation must also return the
396	same response.
397
398	Performance note: It might be tempting to extract commonly used fields from the
399	md structure (e.g. utf8, end_subject) into individual variables to improve
400	performance. Tests using gcc on a SPARC disproved this; in the first case, it
401	made performance worse.
402
403	Arguments:
404	eptr pointer to current character in subject
405	ecode pointer to current position in compiled code
406	mstart pointer to the current match start position (can be modified
407	by encountering \K)
408	offset_top current top pointer
409	md pointer to "static" info for the match
410	ims current /i, /m, and /s options
411	eptrb pointer to chain of blocks containing eptr at start of
412	brackets - for testing for empty matches
413	flags can contain
414	match_condassert - this is an assertion condition
415	match_cbegroup - this is the start of an unlimited repeat
416	group that can match an empty string
417	rdepth the recursion depth
418
419	Returns: MATCH_MATCH if matched ) these values are >= 0
420	MATCH_NOMATCH if failed to match )
421	a negative PCRE_ERROR_xxx value if aborted by an error condition
422	(e.g. stopped by repeated call or recursion limit)
423	*/
424
425	static int
426	match(REGISTER USPTR eptr, REGISTER const uschar ecode, const uschar mstart,
427	int offset_top, match_data md, unsigned long int ims, eptrblock eptrb,
428	int flags, unsigned int rdepth)
429	{
430	/* These variables do not need to be preserved over recursion in this function,
431	so they can be ordinary variables in all cases. Mark some of them with
432	"register" because they are used a lot in loops. */
433
434	register int rrc; /* Returns from recursive calls */
435	register int i; /* Used for loops not involving calls to RMATCH() */
436	register unsigned int c; /* Character values not kept over RMATCH() calls */
437	register BOOL utf8; /* Local copy of UTF-8 flag for speed */
438
439	BOOL minimize, possessive; /* Quantifier options */
440
441	/* When recursion is not being used, all "local" variables that have to be
442	preserved over calls to RMATCH() are part of a "frame" which is obtained from
443	heap storage. Set up the top-level frame here; others are obtained from the
444	heap whenever RMATCH() does a "recursion". See the macro definitions above. */
445
446	#ifdef NO_RECURSE
447	heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
448	frame->Xprevframe = NULL; /* Marks the top level */
449
450	/* Copy in the original argument variables */
451
452	frame->Xeptr = eptr;
453	frame->Xecode = ecode;
454	frame->Xmstart = mstart;
455	frame->Xoffset_top = offset_top;
456	frame->Xims = ims;
457	frame->Xeptrb = eptrb;
458	frame->Xflags = flags;
459	frame->Xrdepth = rdepth;
460
461	/* This is where control jumps back to to effect "recursion" */
462
463	HEAP_RECURSE:
464
465	/* Macros make the argument variables come from the current frame */
466
467	#define eptr frame->Xeptr
468	#define ecode frame->Xecode
469	#define mstart frame->Xmstart
470	#define offset_top frame->Xoffset_top
471	#define ims frame->Xims
472	#define eptrb frame->Xeptrb
473	#define flags frame->Xflags
474	#define rdepth frame->Xrdepth
475
476	/* Ditto for the local variables */
477
478	#ifdef SUPPORT_UTF8
479	#define charptr frame->Xcharptr
480	#endif
481	#define callpat frame->Xcallpat
482	#define data frame->Xdata
483	#define next frame->Xnext
484	#define pp frame->Xpp
485	#define prev frame->Xprev
486	#define saved_eptr frame->Xsaved_eptr
487
488	#define new_recursive frame->Xnew_recursive
489
490	#define cur_is_word frame->Xcur_is_word
491	#define condition frame->Xcondition
492	#define prev_is_word frame->Xprev_is_word
493
494	#define original_ims frame->Xoriginal_ims
495
496	#ifdef SUPPORT_UCP
497	#define prop_type frame->Xprop_type
498	#define prop_value frame->Xprop_value
499	#define prop_fail_result frame->Xprop_fail_result
500	#define prop_category frame->Xprop_category
501	#define prop_chartype frame->Xprop_chartype
502	#define prop_script frame->Xprop_script
503	#define oclength frame->Xoclength
504	#define occhars frame->Xocchars
505	#endif
506
507	#define ctype frame->Xctype
508	#define fc frame->Xfc
509	#define fi frame->Xfi
510	#define length frame->Xlength
511	#define max frame->Xmax
512	#define min frame->Xmin
513	#define number frame->Xnumber
514	#define offset frame->Xoffset
515	#define op frame->Xop
516	#define save_capture_last frame->Xsave_capture_last
517	#define save_offset1 frame->Xsave_offset1
518	#define save_offset2 frame->Xsave_offset2
519	#define save_offset3 frame->Xsave_offset3
520	#define stacksave frame->Xstacksave
521
522	#define newptrb frame->Xnewptrb
523
524	/* When recursion is being used, local variables are allocated on the stack and
525	get preserved during recursion in the normal way. In this environment, fi and
526	i, and fc and c, can be the same variables. */
527
528	#else /* NO_RECURSE not defined */
529	#define fi i
530	#define fc c
531
532
533	#ifdef SUPPORT_UTF8 /* Many of these variables are used only */
534	const uschar charptr; / in small blocks of the code. My normal */
535	#endif /* style of coding would have declared */
536	const uschar callpat; / them within each of those blocks. */
537	const uschar data; / However, in order to accommodate the */
538	const uschar next; / version of this code that uses an */
539	USPTR pp; /* external "stack" implemented on the */
540	const uschar prev; / heap, it is easier to declare them all */
541	USPTR saved_eptr; /* here, so the declarations can be cut */
542	/* out in a block. The only declarations */
543	recursion_info new_recursive; /* within blocks below are for variables */
544	/* that do not have to be preserved over */
545	BOOL cur_is_word; /* a recursive call to RMATCH(). */
546	BOOL condition;
547	BOOL prev_is_word;
548
549	unsigned long int original_ims;
550
551	#ifdef SUPPORT_UCP
552	int prop_type;
553	int prop_value;
554	int prop_fail_result;
555	int prop_category;
556	int prop_chartype;
557	int prop_script;
558	int oclength;
559	uschar occhars[8];
560	#endif
561
562	int ctype;
563	int length;
564	int max;
565	int min;
566	int number;
567	int offset;
568	int op;
569	int save_capture_last;
570	int save_offset1, save_offset2, save_offset3;
571	int stacksave[REC_STACK_SAVE_MAX];
572
573	eptrblock newptrb;
574	#endif /* NO_RECURSE */
575
576	/* These statements are here to stop the compiler complaining about unitialized
577	variables. */
578
579	#ifdef SUPPORT_UCP
580	prop_value = 0;
581	prop_fail_result = 0;
582	#endif
583
584
585	/* This label is used for tail recursion, which is used in a few cases even
586	when NO_RECURSE is not defined, in order to reduce the amount of stack that is
587	used. Thanks to Ian Taylor for noticing this possibility and sending the
588	original patch. */
589
590	TAIL_RECURSE:
591
592	/* OK, now we can get on with the real code of the function. Recursive calls
593	are specified by the macro RMATCH and RRETURN is used to return. When
594	NO_RECURSE is not defined, these just turn into a recursive call to match()
595	and a "return", respectively (possibly with some debugging if DEBUG is
596	defined). However, RMATCH isn't like a function call because it's quite a
597	complicated macro. It has to be used in one particular way. This shouldn't,
598	however, impact performance when true recursion is being used. */
599
600	#ifdef SUPPORT_UTF8
601	utf8 = md->utf8; /* Local copy of the flag */
602	#else
603	utf8 = FALSE;
604	#endif
605
606	/* First check that we haven't called match() too many times, or that we
607	haven't exceeded the recursive call limit. */
608
609	if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
610	if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
611
612	original_ims = ims; /* Save for resetting on ')' */
613
614	/* At the start of a group with an unlimited repeat that may match an empty
615	string, the match_cbegroup flag is set. When this is the case, add the current
616	subject pointer to the chain of such remembered pointers, to be checked when we
617	hit the closing ket, in order to break infinite loops that match no characters.
618	When match() is called in other circumstances, don't add to the chain. The
619	match_cbegroup flag must NOT be used with tail recursion, because the memory
620	block that is used is on the stack, so a new one may be required for each
621	match(). */
622
623	if ((flags & match_cbegroup) != 0)
624	{
625	newptrb.epb_saved_eptr = eptr;
626	newptrb.epb_prev = eptrb;
627	eptrb = &newptrb;
628	}
629
630	/* Now start processing the opcodes. */
631
632	for (;;)
633	{
634	minimize = possessive = FALSE;
635	op = *ecode;
636
637	/* For partial matching, remember if we ever hit the end of the subject after
638	matching at least one subject character. */
639
640	if (md->partial &&
641	eptr >= md->end_subject &&
642	eptr > mstart)
643	md->hitend = TRUE;
644
645	switch(op)
646	{
647	case OP_FAIL:
648	RRETURN(MATCH_NOMATCH);
649
650	case OP_PRUNE:
651	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
652	ims, eptrb, flags, RM51);
653	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
654	RRETURN(MATCH_PRUNE);
655
656	case OP_COMMIT:
657	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
658	ims, eptrb, flags, RM52);
659	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
660	RRETURN(MATCH_COMMIT);
661
662	case OP_SKIP:
663	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
664	ims, eptrb, flags, RM53);
665	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
666	md->start_match_ptr = eptr; /* Pass back current position */
667	RRETURN(MATCH_SKIP);
668
669	case OP_THEN:
670	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
671	ims, eptrb, flags, RM54);
672	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
673	RRETURN(MATCH_THEN);
674
675	/* Handle a capturing bracket. If there is space in the offset vector, save
676	the current subject position in the working slot at the top of the vector.
677	We mustn't change the current values of the data slot, because they may be
678	set from a previous iteration of this group, and be referred to by a
679	reference inside the group.
680
681	If the bracket fails to match, we need to restore this value and also the
682	values of the final offsets, in case they were set by a previous iteration
683	of the same bracket.
684
685	If there isn't enough space in the offset vector, treat this as if it were
686	a non-capturing bracket. Don't worry about setting the flag for the error
687	case here; that is handled in the code for KET. */
688
689	case OP_CBRA:
690	case OP_SCBRA:
691	number = GET2(ecode, 1+LINK_SIZE);
692	offset = number << 1;
693
694	#ifdef DEBUG
695	printf("start bracket %d\n", number);
696	printf("subject=");
697	pchars(eptr, 16, TRUE, md);
698	printf("\n");
699	#endif
700
701	if (offset < md->offset_max)
702	{
703	save_offset1 = md->offset_vector[offset];
704	save_offset2 = md->offset_vector[offset+1];
705	save_offset3 = md->offset_vector[md->offset_end - number];
706	save_capture_last = md->capture_last;
707
708	DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
709	md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
710
711	flags = (op == OP_SCBRA)? match_cbegroup : 0;
712	do
713	{
714	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
715	ims, eptrb, flags, RM1);
716	if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
717	md->capture_last = save_capture_last;
718	ecode += GET(ecode, 1);
719	}
720	while (*ecode == OP_ALT);
721
722	DPRINTF(("bracket %d failed\n", number));
723
724	md->offset_vector[offset] = save_offset1;
725	md->offset_vector[offset+1] = save_offset2;
726	md->offset_vector[md->offset_end - number] = save_offset3;
727
728	RRETURN(MATCH_NOMATCH);
729	}
730
731	/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
732	as a non-capturing bracket. */
733
734	/* VVVVVVVVVVVVVVVVVVVVVVVVV */
735	/* VVVVVVVVVVVVVVVVVVVVVVVVV */
736
737	DPRINTF(("insufficient capture room: treat as non-capturing\n"));
738
739	/* VVVVVVVVVVVVVVVVVVVVVVVVV */
740	/* VVVVVVVVVVVVVVVVVVVVVVVVV */
741
742	/* Non-capturing bracket. Loop for all the alternatives. When we get to the
743	final alternative within the brackets, we would return the result of a
744	recursive call to match() whatever happened. We can reduce stack usage by
745	turning this into a tail recursion, except in the case when match_cbegroup
746	is set.*/
747
748	case OP_BRA:
749	case OP_SBRA:
750	DPRINTF(("start non-capturing bracket\n"));
751	flags = (op >= OP_SBRA)? match_cbegroup : 0;
752	for (;;)
753	{
754	if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
755	{
756	if (flags == 0) /* Not a possibly empty group */
757	{
758	ecode += _pcre_OP_lengths[*ecode];
759	DPRINTF(("bracket 0 tail recursion\n"));
760	goto TAIL_RECURSE;
761	}
762
763	/* Possibly empty group; can't use tail recursion. */
764
765	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
766	eptrb, flags, RM48);
767	RRETURN(rrc);
768	}
769
770	/* For non-final alternatives, continue the loop for a NOMATCH result;
771	otherwise return. */
772
773	RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
774	eptrb, flags, RM2);
775	if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
776	ecode += GET(ecode, 1);
777	}
778	/* Control never reaches here. */
779
780	/* Conditional group: compilation checked that there are no more than
781	two branches. If the condition is false, skipping the first branch takes us
782	past the end if there is only one branch, but that's OK because that is
783	exactly what going to the ket would do. As there is only one branch to be
784	obeyed, we can use tail recursion to avoid using another stack frame. */
785
786	case OP_COND:
787	case OP_SCOND:
788	if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
789	{
790	offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
791	condition = md->recursive != NULL &&
792	(offset == RREF_ANY \|\| offset == md->recursive->group_num);
793	ecode += condition? 3 : GET(ecode, 1);
794	}
795
796	else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
797	{
798	offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
799	condition = offset < offset_top && md->offset_vector[offset] >= 0;
800	ecode += condition? 3 : GET(ecode, 1);
801	}
802
803	else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
804	{
805	condition = FALSE;
806	ecode += GET(ecode, 1);
807	}
808
809	/* The condition is an assertion. Call match() to evaluate it - setting
810	the final argument match_condassert causes it to stop at the end of an
811	assertion. */
812
813	else
814	{
815	RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
816	match_condassert, RM3);
817	if (rrc == MATCH_MATCH)
818	{
819	condition = TRUE;
820	ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
821	while (*ecode == OP_ALT) ecode += GET(ecode, 1);
822	}
823	else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
824	{
825	RRETURN(rrc); /* Need braces because of following else */
826	}
827	else
828	{
829	condition = FALSE;
830	ecode += GET(ecode, 1);
831	}
832	}
833
834	/* We are now at the branch that is to be obeyed. As there is only one,
835	we can use tail recursion to avoid using another stack frame, except when
836	match_cbegroup is required for an unlimited repeat of a possibly empty
837	group. If the second alternative doesn't exist, we can just plough on. */
838
839	if (condition \|\| *ecode == OP_ALT)
840	{
841	ecode += 1 + LINK_SIZE;
842	if (op == OP_SCOND) /* Possibly empty group */
843	{
844	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
845	RRETURN(rrc);
846	}
847	else /* Group must match something */
848	{
849	flags = 0;
850	goto TAIL_RECURSE;
851	}
852	}
853	else /* Condition false & no 2nd alternative */
854	{
855	ecode += 1 + LINK_SIZE;
856	}
857	break;
858
859
860	/* End of the pattern, either real or forced. If we are in a top-level
861	recursion, we should restore the offsets appropriately and continue from
862	after the call. */
863
864	case OP_ACCEPT:
865	case OP_END:
866	if (md->recursive != NULL && md->recursive->group_num == 0)
867	{
868	recursion_info *rec = md->recursive;
869	DPRINTF(("End of pattern in a (?0) recursion\n"));
870	md->recursive = rec->prevrec;
871	memmove(md->offset_vector, rec->offset_save,
872	rec->saved_max * sizeof(int));
873	mstart = rec->save_start;
874	ims = original_ims;
875	ecode = rec->after_call;
876	break;
877	}
878
879	/* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
880	string - backtracking will then try other alternatives, if any. */
881
882	if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
883	md->end_match_ptr = eptr; /* Record where we ended */
884	md->end_offset_top = offset_top; /* and how many extracts were taken */
885	md->start_match_ptr = mstart; /* and the start (\K can modify) */
886	RRETURN(MATCH_MATCH);
887
888	/* Change option settings */
889
890	case OP_OPT:
891	ims = ecode[1];
892	ecode += 2;
893	DPRINTF(("ims set to %02lx\n", ims));
894	break;
895
896	/* Assertion brackets. Check the alternative branches in turn - the
897	matching won't pass the KET for an assertion. If any one branch matches,
898	the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
899	start of each branch to move the current point backwards, so the code at
900	this level is identical to the lookahead case. */
901
902	case OP_ASSERT:
903	case OP_ASSERTBACK:
904	do
905	{
906	RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
907	RM4);
908	if (rrc == MATCH_MATCH) break;
909	if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
910	ecode += GET(ecode, 1);
911	}
912	while (*ecode == OP_ALT);
913	if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
914
915	/* If checking an assertion for a condition, return MATCH_MATCH. */
916
917	if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
918
919	/* Continue from after the assertion, updating the offsets high water
920	mark, since extracts may have been taken during the assertion. */
921
922	do ecode += GET(ecode,1); while (*ecode == OP_ALT);
923	ecode += 1 + LINK_SIZE;
924	offset_top = md->end_offset_top;
925	continue;
926
927	/* Negative assertion: all branches must fail to match */
928
929	case OP_ASSERT_NOT:
930	case OP_ASSERTBACK_NOT:
931	do
932	{
933	RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
934	RM5);
935	if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
936	if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
937	ecode += GET(ecode,1);
938	}
939	while (*ecode == OP_ALT);
940
941	if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
942
943	ecode += 1 + LINK_SIZE;
944	continue;
945
946	/* Move the subject pointer back. This occurs only at the start of
947	each branch of a lookbehind assertion. If we are too close to the start to
948	move back, this match function fails. When working with UTF-8 we move
949	back a number of characters, not bytes. */
950
951	case OP_REVERSE:
952	#ifdef SUPPORT_UTF8
953	if (utf8)
954	{
955	i = GET(ecode, 1);
956	while (i-- > 0)
957	{
958	eptr--;
959	if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
960	BACKCHAR(eptr);
961	}
962	}
963	else
964	#endif
965
966	/* No UTF-8 support, or not in UTF-8 mode: count is byte count */
967
968	{
969	eptr -= GET(ecode, 1);
970	if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
971	}
972
973	/* Skip to next op code */
974
975	ecode += 1 + LINK_SIZE;
976	break;
977
978	/* The callout item calls an external function, if one is provided, passing
979	details of the match so far. This is mainly for debugging, though the
980	function is able to force a failure. */
981
982	case OP_CALLOUT:
983	if (pcre_callout != NULL)
984	{
985	pcre_callout_block cb;
986	cb.version = 1; /* Version 1 of the callout block */
987	cb.callout_number = ecode[1];
988	cb.offset_vector = md->offset_vector;
989	cb.subject = (PCRE_SPTR)md->start_subject;
990	cb.subject_length = md->end_subject - md->start_subject;
991	cb.start_match = mstart - md->start_subject;
992	cb.current_position = eptr - md->start_subject;
993	cb.pattern_position = GET(ecode, 2);
994	cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
995	cb.capture_top = offset_top/2;
996	cb.capture_last = md->capture_last;
997	cb.callout_data = md->callout_data;
998	if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
999	if (rrc < 0) RRETURN(rrc);
1000	}
1001	ecode += 2 + 2*LINK_SIZE;
1002	break;
1003
1004	/* Recursion either matches the current regex, or some subexpression. The
1005	offset data is the offset to the starting bracket from the start of the
1006	whole pattern. (This is so that it works from duplicated subpatterns.)
1007
1008	If there are any capturing brackets started but not finished, we have to
1009	save their starting points and reinstate them after the recursion. However,
1010	we don't know how many such there are (offset_top records the completed
1011	total) so we just have to save all the potential data. There may be up to
1012	65535 such values, which is too large to put on the stack, but using malloc
1013	for small numbers seems expensive. As a compromise, the stack is used when
1014	there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1015	is used. A problem is what to do if the malloc fails ... there is no way of
1016	returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1017	values on the stack, and accept that the rest may be wrong.
1018
1019	There are also other values that have to be saved. We use a chained
1020	sequence of blocks that actually live on the stack. Thanks to Robin Houston
1021	for the original version of this logic. */
1022
1023	case OP_RECURSE:
1024	{
1025	callpat = md->start_code + GET(ecode, 1);
1026	new_recursive.group_num = (callpat == md->start_code)? 0 :
1027	GET2(callpat, 1 + LINK_SIZE);
1028
1029	/* Add to "recursing stack" */
1030
1031	new_recursive.prevrec = md->recursive;
1032	md->recursive = &new_recursive;
1033
1034	/* Find where to continue from afterwards */
1035
1036	ecode += 1 + LINK_SIZE;
1037	new_recursive.after_call = ecode;
1038
1039	/* Now save the offset data. */
1040
1041	new_recursive.saved_max = md->offset_end;
1042	if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1043	new_recursive.offset_save = stacksave;
1044	else
1045	{
1046	new_recursive.offset_save =
1047	(int )(pcre_malloc)(new_recursive.saved_max sizeof(int));
1048	if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1049	}
1050
1051	memcpy(new_recursive.offset_save, md->offset_vector,
1052	new_recursive.saved_max * sizeof(int));
1053	new_recursive.save_start = mstart;
1054	mstart = eptr;
1055
1056	/* OK, now we can do the recursion. For each top-level alternative we
1057	restore the offset and recursion data. */
1058
1059	DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1060	flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1061	do
1062	{
1063	RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1064	md, ims, eptrb, flags, RM6);
1065	if (rrc == MATCH_MATCH)
1066	{
1067	DPRINTF(("Recursion matched\n"));
1068	md->recursive = new_recursive.prevrec;
1069	if (new_recursive.offset_save != stacksave)
1070	(pcre_free)(new_recursive.offset_save);
1071	RRETURN(MATCH_MATCH);
1072	}
1073	else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1074	{
1075	DPRINTF(("Recursion gave error %d\n", rrc));
1076	RRETURN(rrc);
1077	}
1078
1079	md->recursive = &new_recursive;
1080	memcpy(md->offset_vector, new_recursive.offset_save,
1081	new_recursive.saved_max * sizeof(int));
1082	callpat += GET(callpat, 1);
1083	}
1084	while (*callpat == OP_ALT);
1085
1086	DPRINTF(("Recursion didn't match\n"));
1087	md->recursive = new_recursive.prevrec;
1088	if (new_recursive.offset_save != stacksave)
1089	(pcre_free)(new_recursive.offset_save);
1090	RRETURN(MATCH_NOMATCH);
1091	}
1092	/* Control never reaches here */
1093
1094	/* "Once" brackets are like assertion brackets except that after a match,
1095	the point in the subject string is not moved back. Thus there can never be
1096	a move back into the brackets. Friedl calls these "atomic" subpatterns.
1097	Check the alternative branches in turn - the matching won't pass the KET
1098	for this kind of subpattern. If any one branch matches, we carry on as at
1099	the end of a normal bracket, leaving the subject pointer. */
1100
1101	case OP_ONCE:
1102	prev = ecode;
1103	saved_eptr = eptr;
1104
1105	do
1106	{
1107	RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1108	if (rrc == MATCH_MATCH) break;
1109	if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1110	ecode += GET(ecode,1);
1111	}
1112	while (*ecode == OP_ALT);
1113
1114	/* If hit the end of the group (which could be repeated), fail */
1115
1116	if (ecode != OP_ONCE && ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1117
1118	/* Continue as from after the assertion, updating the offsets high water
1119	mark, since extracts may have been taken. */
1120
1121	do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1122
1123	offset_top = md->end_offset_top;
1124	eptr = md->end_match_ptr;
1125
1126	/* For a non-repeating ket, just continue at this level. This also
1127	happens for a repeating ket if no characters were matched in the group.
1128	This is the forcible breaking of infinite loops as implemented in Perl
1129	5.005. If there is an options reset, it will get obeyed in the normal
1130	course of events. */
1131
1132	if (*ecode == OP_KET \|\| eptr == saved_eptr)
1133	{
1134	ecode += 1+LINK_SIZE;
1135	break;
1136	}
1137
1138	/* The repeating kets try the rest of the pattern or restart from the
1139	preceding bracket, in the appropriate order. The second "call" of match()
1140	uses tail recursion, to avoid using another stack frame. We need to reset
1141	any options that changed within the bracket before re-running it, so
1142	check the next opcode. */
1143
1144	if (ecode[1+LINK_SIZE] == OP_OPT)
1145	{
1146	ims = (ims & ~PCRE_IMS) \| ecode[4];
1147	DPRINTF(("ims set to %02lx at group repeat\n", ims));
1148	}
1149
1150	if (*ecode == OP_KETRMIN)
1151	{
1152	RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1153	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1154	ecode = prev;
1155	flags = 0;
1156	goto TAIL_RECURSE;
1157	}
1158	else /* OP_KETRMAX */
1159	{
1160	RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1161	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1162	ecode += 1 + LINK_SIZE;
1163	flags = 0;
1164	goto TAIL_RECURSE;
1165	}
1166	/* Control never gets here */
1167
1168	/* An alternation is the end of a branch; scan along to find the end of the
1169	bracketed group and go to there. */
1170
1171	case OP_ALT:
1172	do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1173	break;
1174
1175	/* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1176	indicating that it may occur zero times. It may repeat infinitely, or not
1177	at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1178	with fixed upper repeat limits are compiled as a number of copies, with the
1179	optional ones preceded by BRAZERO or BRAMINZERO. */
1180
1181	case OP_BRAZERO:
1182	{
1183	next = ecode+1;
1184	RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1185	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1186	do next += GET(next,1); while (*next == OP_ALT);
1187	ecode = next + 1 + LINK_SIZE;
1188	}
1189	break;
1190
1191	case OP_BRAMINZERO:
1192	{
1193	next = ecode+1;
1194	do next += GET(next, 1); while (*next == OP_ALT);
1195	RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1196	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1197	ecode++;
1198	}
1199	break;
1200
1201	case OP_SKIPZERO:
1202	{
1203	next = ecode+1;
1204	do next += GET(next,1); while (*next == OP_ALT);
1205	ecode = next + 1 + LINK_SIZE;
1206	}
1207	break;
1208
1209	/* End of a group, repeated or non-repeating. */
1210
1211	case OP_KET:
1212	case OP_KETRMIN:
1213	case OP_KETRMAX:
1214	prev = ecode - GET(ecode, 1);
1215
1216	/* If this was a group that remembered the subject start, in order to break
1217	infinite repeats of empty string matches, retrieve the subject start from
1218	the chain. Otherwise, set it NULL. */
1219
1220	if (*prev >= OP_SBRA)
1221	{
1222	saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1223	eptrb = eptrb->epb_prev; /* Backup to previous group */
1224	}
1225	else saved_eptr = NULL;
1226
1227	/* If we are at the end of an assertion group, stop matching and return
1228	MATCH_MATCH, but record the current high water mark for use by positive
1229	assertions. Do this also for the "once" (atomic) groups. */
1230
1231	if (prev == OP_ASSERT \|\| prev == OP_ASSERT_NOT \|\|
1232	prev == OP_ASSERTBACK \|\| prev == OP_ASSERTBACK_NOT \|\|
1233	*prev == OP_ONCE)
1234	{
1235	md->end_match_ptr = eptr; /* For ONCE */
1236	md->end_offset_top = offset_top;
1237	RRETURN(MATCH_MATCH);
1238	}
1239
1240	/* For capturing groups we have to check the group number back at the start
1241	and if necessary complete handling an extraction by setting the offsets and
1242	bumping the high water mark. Note that whole-pattern recursion is coded as
1243	a recurse into group 0, so it won't be picked up here. Instead, we catch it
1244	when the OP_END is reached. Other recursion is handled here. */
1245
1246	if (prev == OP_CBRA \|\| prev == OP_SCBRA)
1247	{
1248	number = GET2(prev, 1+LINK_SIZE);
1249	offset = number << 1;
1250
1251	#ifdef DEBUG
1252	printf("end bracket %d", number);
1253	printf("\n");
1254	#endif
1255
1256	md->capture_last = number;
1257	if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1258	{
1259	md->offset_vector[offset] =
1260	md->offset_vector[md->offset_end - number];
1261	md->offset_vector[offset+1] = eptr - md->start_subject;
1262	if (offset_top <= offset) offset_top = offset + 2;
1263	}
1264
1265	/* Handle a recursively called group. Restore the offsets
1266	appropriately and continue from after the call. */
1267
1268	if (md->recursive != NULL && md->recursive->group_num == number)
1269	{
1270	recursion_info *rec = md->recursive;
1271	DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1272	md->recursive = rec->prevrec;
1273	mstart = rec->save_start;
1274	memcpy(md->offset_vector, rec->offset_save,
1275	rec->saved_max * sizeof(int));
1276	ecode = rec->after_call;
1277	ims = original_ims;
1278	break;
1279	}
1280	}
1281
1282	/* For both capturing and non-capturing groups, reset the value of the ims
1283	flags, in case they got changed during the group. */
1284
1285	ims = original_ims;
1286	DPRINTF(("ims reset to %02lx\n", ims));
1287
1288	/* For a non-repeating ket, just continue at this level. This also
1289	happens for a repeating ket if no characters were matched in the group.
1290	This is the forcible breaking of infinite loops as implemented in Perl
1291	5.005. If there is an options reset, it will get obeyed in the normal
1292	course of events. */
1293
1294	if (*ecode == OP_KET \|\| eptr == saved_eptr)
1295	{
1296	ecode += 1 + LINK_SIZE;
1297	break;
1298	}
1299
1300	/* The repeating kets try the rest of the pattern or restart from the
1301	preceding bracket, in the appropriate order. In the second case, we can use
1302	tail recursion to avoid using another stack frame, unless we have an
1303	unlimited repeat of a group that can match an empty string. */
1304
1305	flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1306
1307	if (*ecode == OP_KETRMIN)
1308	{
1309	RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1310	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1311	if (flags != 0) /* Could match an empty string */
1312	{
1313	RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1314	RRETURN(rrc);
1315	}
1316	ecode = prev;
1317	goto TAIL_RECURSE;
1318	}
1319	else /* OP_KETRMAX */
1320	{
1321	RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1322	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1323	ecode += 1 + LINK_SIZE;
1324	flags = 0;
1325	goto TAIL_RECURSE;
1326	}
1327	/* Control never gets here */
1328
1329	/* Start of subject unless notbol, or after internal newline if multiline */
1330
1331	case OP_CIRC:
1332	if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1333	if ((ims & PCRE_MULTILINE) != 0)
1334	{
1335	if (eptr != md->start_subject &&
1336	(eptr == md->end_subject \|\| !WAS_NEWLINE(eptr)))
1337	RRETURN(MATCH_NOMATCH);
1338	ecode++;
1339	break;
1340	}
1341	/* ... else fall through */
1342
1343	/* Start of subject assertion */
1344
1345	case OP_SOD:
1346	if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1347	ecode++;
1348	break;
1349
1350	/* Start of match assertion */
1351
1352	case OP_SOM:
1353	if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1354	ecode++;
1355	break;
1356
1357	/* Reset the start of match point */
1358
1359	case OP_SET_SOM:
1360	mstart = eptr;
1361	ecode++;
1362	break;
1363
1364	/* Assert before internal newline if multiline, or before a terminating
1365	newline unless endonly is set, else end of subject unless noteol is set. */
1366
1367	case OP_DOLL:
1368	if ((ims & PCRE_MULTILINE) != 0)
1369	{
1370	if (eptr < md->end_subject)
1371	{ if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1372	else
1373	{ if (md->noteol) RRETURN(MATCH_NOMATCH); }
1374	ecode++;
1375	break;
1376	}
1377	else
1378	{
1379	if (md->noteol) RRETURN(MATCH_NOMATCH);
1380	if (!md->endonly)
1381	{
1382	if (eptr != md->end_subject &&
1383	(!IS_NEWLINE(eptr) \|\| eptr != md->end_subject - md->nllen))
1384	RRETURN(MATCH_NOMATCH);
1385	ecode++;
1386	break;
1387	}
1388	}
1389	/* ... else fall through for endonly */
1390
1391	/* End of subject assertion (\z) */
1392
1393	case OP_EOD:
1394	if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1395	ecode++;
1396	break;
1397
1398	/* End of subject or ending \n assertion (\Z) */
1399
1400	case OP_EODN:
1401	if (eptr != md->end_subject &&
1402	(!IS_NEWLINE(eptr) \|\| eptr != md->end_subject - md->nllen))
1403	RRETURN(MATCH_NOMATCH);
1404	ecode++;
1405	break;
1406
1407	/* Word boundary assertions */
1408
1409	case OP_NOT_WORD_BOUNDARY:
1410	case OP_WORD_BOUNDARY:
1411	{
1412
1413	/* Find out if the previous and current characters are "word" characters.
1414	It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1415	be "non-word" characters. */
1416
1417	#ifdef SUPPORT_UTF8
1418	if (utf8)
1419	{
1420	if (eptr == md->start_subject) prev_is_word = FALSE; else
1421	{
1422	const uschar *lastptr = eptr - 1;
1423	while((*lastptr & 0xc0) == 0x80) lastptr--;
1424	GETCHAR(c, lastptr);
1425	prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1426	}
1427	if (eptr >= md->end_subject) cur_is_word = FALSE; else
1428	{
1429	GETCHAR(c, eptr);
1430	cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1431	}
1432	}
1433	else
1434	#endif
1435
1436	/* More streamlined when not in UTF-8 mode */
1437
1438	{
1439	prev_is_word = (eptr != md->start_subject) &&
1440	((md->ctypes[eptr[-1]] & ctype_word) != 0);
1441	cur_is_word = (eptr < md->end_subject) &&
1442	((md->ctypes[*eptr] & ctype_word) != 0);
1443	}
1444
1445	/* Now see if the situation is what we want */
1446
1447	if ((*ecode++ == OP_WORD_BOUNDARY)?
1448	cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1449	RRETURN(MATCH_NOMATCH);
1450	}
1451	break;
1452
1453	/* Match a single character type; inline for speed */
1454
1455	case OP_ANY:
1456	if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1457	/* Fall through */
1458
1459	case OP_ALLANY:
1460	if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1461	if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1462	ecode++;
1463	break;
1464
1465	/* Match a single byte, even in UTF-8 mode. This opcode really does match
1466	any byte, even newline, independent of the setting of PCRE_DOTALL. */
1467
1468	case OP_ANYBYTE:
1469	if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1470	ecode++;
1471	break;
1472
1473	case OP_NOT_DIGIT:
1474	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1475	GETCHARINCTEST(c, eptr);
1476	if (
1477	#ifdef SUPPORT_UTF8
1478	c < 256 &&
1479	#endif
1480	(md->ctypes[c] & ctype_digit) != 0
1481	)
1482	RRETURN(MATCH_NOMATCH);
1483	ecode++;
1484	break;
1485
1486	case OP_DIGIT:
1487	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1488	GETCHARINCTEST(c, eptr);
1489	if (
1490	#ifdef SUPPORT_UTF8
1491	c >= 256 \|\|
1492	#endif
1493	(md->ctypes[c] & ctype_digit) == 0
1494	)
1495	RRETURN(MATCH_NOMATCH);
1496	ecode++;
1497	break;
1498
1499	case OP_NOT_WHITESPACE:
1500	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1501	GETCHARINCTEST(c, eptr);
1502	if (
1503	#ifdef SUPPORT_UTF8
1504	c < 256 &&
1505	#endif
1506	(md->ctypes[c] & ctype_space) != 0
1507	)
1508	RRETURN(MATCH_NOMATCH);
1509	ecode++;
1510	break;
1511
1512	case OP_WHITESPACE:
1513	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1514	GETCHARINCTEST(c, eptr);
1515	if (
1516	#ifdef SUPPORT_UTF8
1517	c >= 256 \|\|
1518	#endif
1519	(md->ctypes[c] & ctype_space) == 0
1520	)
1521	RRETURN(MATCH_NOMATCH);
1522	ecode++;
1523	break;
1524
1525	case OP_NOT_WORDCHAR:
1526	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1527	GETCHARINCTEST(c, eptr);
1528	if (
1529	#ifdef SUPPORT_UTF8
1530	c < 256 &&
1531	#endif
1532	(md->ctypes[c] & ctype_word) != 0
1533	)
1534	RRETURN(MATCH_NOMATCH);
1535	ecode++;
1536	break;
1537
1538	case OP_WORDCHAR:
1539	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1540	GETCHARINCTEST(c, eptr);
1541	if (
1542	#ifdef SUPPORT_UTF8
1543	c >= 256 \|\|
1544	#endif
1545	(md->ctypes[c] & ctype_word) == 0
1546	)
1547	RRETURN(MATCH_NOMATCH);
1548	ecode++;
1549	break;
1550
1551	case OP_ANYNL:
1552	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1553	GETCHARINCTEST(c, eptr);
1554	switch(c)
1555	{
1556	default: RRETURN(MATCH_NOMATCH);
1557	case 0x000d:
1558	if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1559	break;
1560
1561	case 0x000a:
1562	break;
1563
1564	case 0x000b:
1565	case 0x000c:
1566	case 0x0085:
1567	case 0x2028:
1568	case 0x2029:
1569	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1570	break;
1571	}
1572	ecode++;
1573	break;
1574
1575	case OP_NOT_HSPACE:
1576	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1577	GETCHARINCTEST(c, eptr);
1578	switch(c)
1579	{
1580	default: break;
1581	case 0x09: /* HT */
1582	case 0x20: /* SPACE */
1583	case 0xa0: /* NBSP */
1584	case 0x1680: /* OGHAM SPACE MARK */
1585	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1586	case 0x2000: /* EN QUAD */
1587	case 0x2001: /* EM QUAD */
1588	case 0x2002: /* EN SPACE */
1589	case 0x2003: /* EM SPACE */
1590	case 0x2004: /* THREE-PER-EM SPACE */
1591	case 0x2005: /* FOUR-PER-EM SPACE */
1592	case 0x2006: /* SIX-PER-EM SPACE */
1593	case 0x2007: /* FIGURE SPACE */
1594	case 0x2008: /* PUNCTUATION SPACE */
1595	case 0x2009: /* THIN SPACE */
1596	case 0x200A: /* HAIR SPACE */
1597	case 0x202f: /* NARROW NO-BREAK SPACE */
1598	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1599	case 0x3000: /* IDEOGRAPHIC SPACE */
1600	RRETURN(MATCH_NOMATCH);
1601	}
1602	ecode++;
1603	break;
1604
1605	case OP_HSPACE:
1606	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1607	GETCHARINCTEST(c, eptr);
1608	switch(c)
1609	{
1610	default: RRETURN(MATCH_NOMATCH);
1611	case 0x09: /* HT */
1612	case 0x20: /* SPACE */
1613	case 0xa0: /* NBSP */
1614	case 0x1680: /* OGHAM SPACE MARK */
1615	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1616	case 0x2000: /* EN QUAD */
1617	case 0x2001: /* EM QUAD */
1618	case 0x2002: /* EN SPACE */
1619	case 0x2003: /* EM SPACE */
1620	case 0x2004: /* THREE-PER-EM SPACE */
1621	case 0x2005: /* FOUR-PER-EM SPACE */
1622	case 0x2006: /* SIX-PER-EM SPACE */
1623	case 0x2007: /* FIGURE SPACE */
1624	case 0x2008: /* PUNCTUATION SPACE */
1625	case 0x2009: /* THIN SPACE */
1626	case 0x200A: /* HAIR SPACE */
1627	case 0x202f: /* NARROW NO-BREAK SPACE */
1628	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1629	case 0x3000: /* IDEOGRAPHIC SPACE */
1630	break;
1631	}
1632	ecode++;
1633	break;
1634
1635	case OP_NOT_VSPACE:
1636	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1637	GETCHARINCTEST(c, eptr);
1638	switch(c)
1639	{
1640	default: break;
1641	case 0x0a: /* LF */
1642	case 0x0b: /* VT */
1643	case 0x0c: /* FF */
1644	case 0x0d: /* CR */
1645	case 0x85: /* NEL */
1646	case 0x2028: /* LINE SEPARATOR */
1647	case 0x2029: /* PARAGRAPH SEPARATOR */
1648	RRETURN(MATCH_NOMATCH);
1649	}
1650	ecode++;
1651	break;
1652
1653	case OP_VSPACE:
1654	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1655	GETCHARINCTEST(c, eptr);
1656	switch(c)
1657	{
1658	default: RRETURN(MATCH_NOMATCH);
1659	case 0x0a: /* LF */
1660	case 0x0b: /* VT */
1661	case 0x0c: /* FF */
1662	case 0x0d: /* CR */
1663	case 0x85: /* NEL */
1664	case 0x2028: /* LINE SEPARATOR */
1665	case 0x2029: /* PARAGRAPH SEPARATOR */
1666	break;
1667	}
1668	ecode++;
1669	break;
1670
1671	#ifdef SUPPORT_UCP
1672	/* Check the next character by Unicode property. We will get here only
1673	if the support is in the binary; otherwise a compile-time error occurs. */
1674
1675	case OP_PROP:
1676	case OP_NOTPROP:
1677	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1678	GETCHARINCTEST(c, eptr);
1679	{
1680	const ucd_record * prop = GET_UCD(c);
1681
1682	switch(ecode[1])
1683	{
1684	case PT_ANY:
1685	if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1686	break;
1687
1688	case PT_LAMP:
1689	if ((prop->chartype == ucp_Lu \|\|
1690	prop->chartype == ucp_Ll \|\|
1691	prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1692	RRETURN(MATCH_NOMATCH);
1693	break;
1694
1695	case PT_GC:
1696	if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1697	RRETURN(MATCH_NOMATCH);
1698	break;
1699
1700	case PT_PC:
1701	if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1702	RRETURN(MATCH_NOMATCH);
1703	break;
1704
1705	case PT_SC:
1706	if ((ecode[2] != prop->script) == (op == OP_PROP))
1707	RRETURN(MATCH_NOMATCH);
1708	break;
1709
1710	default:
1711	RRETURN(PCRE_ERROR_INTERNAL);
1712	}
1713
1714	ecode += 3;
1715	}
1716	break;
1717
1718	/* Match an extended Unicode sequence. We will get here only if the support
1719	is in the binary; otherwise a compile-time error occurs. */
1720
1721	case OP_EXTUNI:
1722	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1723	GETCHARINCTEST(c, eptr);
1724	{
1725	int category = UCD_CATEGORY(c);
1726	if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1727	while (eptr < md->end_subject)
1728	{
1729	int len = 1;
1730	if (!utf8) c = *eptr; else
1731	{
1732	GETCHARLEN(c, eptr, len);
1733	}
1734	category = UCD_CATEGORY(c);
1735	if (category != ucp_M) break;
1736	eptr += len;
1737	}
1738	}
1739	ecode++;
1740	break;
1741	#endif
1742
1743
1744	/* Match a back reference, possibly repeatedly. Look past the end of the
1745	item to see if there is repeat information following. The code is similar
1746	to that for character classes, but repeated for efficiency. Then obey
1747	similar code to character type repeats - written out again for speed.
1748	However, if the referenced string is the empty string, always treat
1749	it as matched, any number of times (otherwise there could be infinite
1750	loops). */
1751
1752	case OP_REF:
1753	{
1754	offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1755	ecode += 3;
1756
1757	/* If the reference is unset, there are two possibilities:
1758
1759	(a) In the default, Perl-compatible state, set the length to be longer
1760	than the amount of subject left; this ensures that every attempt at a
1761	match fails. We can't just fail here, because of the possibility of
1762	quantifiers with zero minima.
1763
1764	(b) If the JavaScript compatibility flag is set, set the length to zero
1765	so that the back reference matches an empty string.
1766
1767	Otherwise, set the length to the length of what was matched by the
1768	referenced subpattern. */
1769
1770	if (offset >= offset_top \|\| md->offset_vector[offset] < 0)
1771	length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1772	else
1773	length = md->offset_vector[offset+1] - md->offset_vector[offset];
1774
1775	/* Set up for repetition, or handle the non-repeated case */
1776
1777	switch (*ecode)
1778	{
1779	case OP_CRSTAR:
1780	case OP_CRMINSTAR:
1781	case OP_CRPLUS:
1782	case OP_CRMINPLUS:
1783	case OP_CRQUERY:
1784	case OP_CRMINQUERY:
1785	c = *ecode++ - OP_CRSTAR;
1786	minimize = (c & 1) != 0;
1787	min = rep_min[c]; /* Pick up values from tables; */
1788	max = rep_max[c]; /* zero for max => infinity */
1789	if (max == 0) max = INT_MAX;
1790	break;
1791
1792	case OP_CRRANGE:
1793	case OP_CRMINRANGE:
1794	minimize = (*ecode == OP_CRMINRANGE);
1795	min = GET2(ecode, 1);
1796	max = GET2(ecode, 3);
1797	if (max == 0) max = INT_MAX;
1798	ecode += 5;
1799	break;
1800
1801	default: /* No repeat follows */
1802	if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1803	eptr += length;
1804	continue; /* With the main loop */
1805	}
1806
1807	/* If the length of the reference is zero, just continue with the
1808	main loop. */
1809
1810	if (length == 0) continue;
1811
1812	/* First, ensure the minimum number of matches are present. We get back
1813	the length of the reference string explicitly rather than passing the
1814	address of eptr, so that eptr can be a register variable. */
1815
1816	for (i = 1; i <= min; i++)
1817	{
1818	if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1819	eptr += length;
1820	}
1821
1822	/* If min = max, continue at the same level without recursion.
1823	They are not both allowed to be zero. */
1824
1825	if (min == max) continue;
1826
1827	/* If minimizing, keep trying and advancing the pointer */
1828
1829	if (minimize)
1830	{
1831	for (fi = min;; fi++)
1832	{
1833	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1834	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1835	if (fi >= max \|\| !match_ref(offset, eptr, length, md, ims))
1836	RRETURN(MATCH_NOMATCH);
1837	eptr += length;
1838	}
1839	/* Control never gets here */
1840	}
1841
1842	/* If maximizing, find the longest string and work backwards */
1843
1844	else
1845	{
1846	pp = eptr;
1847	for (i = min; i < max; i++)
1848	{
1849	if (!match_ref(offset, eptr, length, md, ims)) break;
1850	eptr += length;
1851	}
1852	while (eptr >= pp)
1853	{
1854	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1855	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1856	eptr -= length;
1857	}
1858	RRETURN(MATCH_NOMATCH);
1859	}
1860	}
1861	/* Control never gets here */
1862
1863
1864
1865	/* Match a bit-mapped character class, possibly repeatedly. This op code is
1866	used when all the characters in the class have values in the range 0-255,
1867	and either the matching is caseful, or the characters are in the range
1868	0-127 when UTF-8 processing is enabled. The only difference between
1869	OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1870	encountered.
1871
1872	First, look past the end of the item to see if there is repeat information
1873	following. Then obey similar code to character type repeats - written out
1874	again for speed. */
1875
1876	case OP_NCLASS:
1877	case OP_CLASS:
1878	{
1879	data = ecode + 1; /* Save for matching */
1880	ecode += 33; /* Advance past the item */
1881
1882	switch (*ecode)
1883	{
1884	case OP_CRSTAR:
1885	case OP_CRMINSTAR:
1886	case OP_CRPLUS:
1887	case OP_CRMINPLUS:
1888	case OP_CRQUERY:
1889	case OP_CRMINQUERY:
1890	c = *ecode++ - OP_CRSTAR;
1891	minimize = (c & 1) != 0;
1892	min = rep_min[c]; /* Pick up values from tables; */
1893	max = rep_max[c]; /* zero for max => infinity */
1894	if (max == 0) max = INT_MAX;
1895	break;
1896
1897	case OP_CRRANGE:
1898	case OP_CRMINRANGE:
1899	minimize = (*ecode == OP_CRMINRANGE);
1900	min = GET2(ecode, 1);
1901	max = GET2(ecode, 3);
1902	if (max == 0) max = INT_MAX;
1903	ecode += 5;
1904	break;
1905
1906	default: /* No repeat follows */
1907	min = max = 1;
1908	break;
1909	}
1910
1911	/* First, ensure the minimum number of matches are present. */
1912
1913	#ifdef SUPPORT_UTF8
1914	/* UTF-8 mode */
1915	if (utf8)
1916	{
1917	for (i = 1; i <= min; i++)
1918	{
1919	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1920	GETCHARINC(c, eptr);
1921	if (c > 255)
1922	{
1923	if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1924	}
1925	else
1926	{
1927	if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1928	}
1929	}
1930	}
1931	else
1932	#endif
1933	/* Not UTF-8 mode */
1934	{
1935	for (i = 1; i <= min; i++)
1936	{
1937	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1938	c = *eptr++;
1939	if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1940	}
1941	}
1942
1943	/* If max == min we can continue with the main loop without the
1944	need to recurse. */
1945
1946	if (min == max) continue;
1947
1948	/* If minimizing, keep testing the rest of the expression and advancing
1949	the pointer while it matches the class. */
1950
1951	if (minimize)
1952	{
1953	#ifdef SUPPORT_UTF8
1954	/* UTF-8 mode */
1955	if (utf8)
1956	{
1957	for (fi = min;; fi++)
1958	{
1959	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1960	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1961	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1962	GETCHARINC(c, eptr);
1963	if (c > 255)
1964	{
1965	if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1966	}
1967	else
1968	{
1969	if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1970	}
1971	}
1972	}
1973	else
1974	#endif
1975	/* Not UTF-8 mode */
1976	{
1977	for (fi = min;; fi++)
1978	{
1979	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1980	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1981	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1982	c = *eptr++;
1983	if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1984	}
1985	}
1986	/* Control never gets here */
1987	}
1988
1989	/* If maximizing, find the longest possible run, then work backwards. */
1990
1991	else
1992	{
1993	pp = eptr;
1994
1995	#ifdef SUPPORT_UTF8
1996	/* UTF-8 mode */
1997	if (utf8)
1998	{
1999	for (i = min; i < max; i++)
2000	{
2001	int len = 1;
2002	if (eptr >= md->end_subject) break;
2003	GETCHARLEN(c, eptr, len);
2004	if (c > 255)
2005	{
2006	if (op == OP_CLASS) break;
2007	}
2008	else
2009	{
2010	if ((data[c/8] & (1 << (c&7))) == 0) break;
2011	}
2012	eptr += len;
2013	}
2014	for (;;)
2015	{
2016	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2017	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2018	if (eptr-- == pp) break; /* Stop if tried at original pos */
2019	BACKCHAR(eptr);
2020	}
2021	}
2022	else
2023	#endif
2024	/* Not UTF-8 mode */
2025	{
2026	for (i = min; i < max; i++)
2027	{
2028	if (eptr >= md->end_subject) break;
2029	c = *eptr;
2030	if ((data[c/8] & (1 << (c&7))) == 0) break;
2031	eptr++;
2032	}
2033	while (eptr >= pp)
2034	{
2035	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2036	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2037	eptr--;
2038	}
2039	}
2040
2041	RRETURN(MATCH_NOMATCH);
2042	}
2043	}
2044	/* Control never gets here */
2045
2046
2047	/* Match an extended character class. This opcode is encountered only
2048	in UTF-8 mode, because that's the only time it is compiled. */
2049
2050	#ifdef SUPPORT_UTF8
2051	case OP_XCLASS:
2052	{
2053	data = ecode + 1 + LINK_SIZE; /* Save for matching */
2054	ecode += GET(ecode, 1); /* Advance past the item */
2055
2056	switch (*ecode)
2057	{
2058	case OP_CRSTAR:
2059	case OP_CRMINSTAR:
2060	case OP_CRPLUS:
2061	case OP_CRMINPLUS:
2062	case OP_CRQUERY:
2063	case OP_CRMINQUERY:
2064	c = *ecode++ - OP_CRSTAR;
2065	minimize = (c & 1) != 0;
2066	min = rep_min[c]; /* Pick up values from tables; */
2067	max = rep_max[c]; /* zero for max => infinity */
2068	if (max == 0) max = INT_MAX;
2069	break;
2070
2071	case OP_CRRANGE:
2072	case OP_CRMINRANGE:
2073	minimize = (*ecode == OP_CRMINRANGE);
2074	min = GET2(ecode, 1);
2075	max = GET2(ecode, 3);
2076	if (max == 0) max = INT_MAX;
2077	ecode += 5;
2078	break;
2079
2080	default: /* No repeat follows */
2081	min = max = 1;
2082	break;
2083	}
2084
2085	/* First, ensure the minimum number of matches are present. */
2086
2087	for (i = 1; i <= min; i++)
2088	{
2089	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2090	GETCHARINC(c, eptr);
2091	if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2092	}
2093
2094	/* If max == min we can continue with the main loop without the
2095	need to recurse. */
2096
2097	if (min == max) continue;
2098
2099	/* If minimizing, keep testing the rest of the expression and advancing
2100	the pointer while it matches the class. */
2101
2102	if (minimize)
2103	{
2104	for (fi = min;; fi++)
2105	{
2106	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2107	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2108	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2109	GETCHARINC(c, eptr);
2110	if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2111	}
2112	/* Control never gets here */
2113	}
2114
2115	/* If maximizing, find the longest possible run, then work backwards. */
2116
2117	else
2118	{
2119	pp = eptr;
2120	for (i = min; i < max; i++)
2121	{
2122	int len = 1;
2123	if (eptr >= md->end_subject) break;
2124	GETCHARLEN(c, eptr, len);
2125	if (!_pcre_xclass(c, data)) break;
2126	eptr += len;
2127	}
2128	for(;;)
2129	{
2130	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2131	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2132	if (eptr-- == pp) break; /* Stop if tried at original pos */
2133	if (utf8) BACKCHAR(eptr);
2134	}
2135	RRETURN(MATCH_NOMATCH);
2136	}
2137
2138	/* Control never gets here */
2139	}
2140	#endif /* End of XCLASS */
2141
2142	/* Match a single character, casefully */
2143
2144	case OP_CHAR:
2145	#ifdef SUPPORT_UTF8
2146	if (utf8)
2147	{
2148	length = 1;
2149	ecode++;
2150	GETCHARLEN(fc, ecode, length);
2151	if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2152	while (length-- > 0) if (ecode++ != eptr++) RRETURN(MATCH_NOMATCH);
2153	}
2154	else
2155	#endif
2156
2157	/* Non-UTF-8 mode */
2158	{
2159	if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2160	if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2161	ecode += 2;
2162	}
2163	break;
2164
2165	/* Match a single character, caselessly */
2166
2167	case OP_CHARNC:
2168	#ifdef SUPPORT_UTF8
2169	if (utf8)
2170	{
2171	length = 1;
2172	ecode++;
2173	GETCHARLEN(fc, ecode, length);
2174
2175	if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2176
2177	/* If the pattern character's value is < 128, we have only one byte, and
2178	can use the fast lookup table. */
2179
2180	if (fc < 128)
2181	{
2182	if (md->lcc[ecode++] != md->lcc[eptr++]) RRETURN(MATCH_NOMATCH);
2183	}
2184
2185	/* Otherwise we must pick up the subject character */
2186
2187	else
2188	{
2189	unsigned int dc;
2190	GETCHARINC(dc, eptr);
2191	ecode += length;
2192
2193	/* If we have Unicode property support, we can use it to test the other
2194	case of the character, if there is one. */
2195
2196	if (fc != dc)
2197	{
2198	#ifdef SUPPORT_UCP
2199	if (dc != UCD_OTHERCASE(fc))
2200	#endif
2201	RRETURN(MATCH_NOMATCH);
2202	}
2203	}
2204	}
2205	else
2206	#endif /* SUPPORT_UTF8 */
2207
2208	/* Non-UTF-8 mode */
2209	{
2210	if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2211	if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2212	ecode += 2;
2213	}
2214	break;
2215
2216	/* Match a single character repeatedly. */
2217
2218	case OP_EXACT:
2219	min = max = GET2(ecode, 1);
2220	ecode += 3;
2221	goto REPEATCHAR;
2222
2223	case OP_POSUPTO:
2224	possessive = TRUE;
2225	/* Fall through */
2226
2227	case OP_UPTO:
2228	case OP_MINUPTO:
2229	min = 0;
2230	max = GET2(ecode, 1);
2231	minimize = *ecode == OP_MINUPTO;
2232	ecode += 3;
2233	goto REPEATCHAR;
2234
2235	case OP_POSSTAR:
2236	possessive = TRUE;
2237	min = 0;
2238	max = INT_MAX;
2239	ecode++;
2240	goto REPEATCHAR;
2241
2242	case OP_POSPLUS:
2243	possessive = TRUE;
2244	min = 1;
2245	max = INT_MAX;
2246	ecode++;
2247	goto REPEATCHAR;
2248
2249	case OP_POSQUERY:
2250	possessive = TRUE;
2251	min = 0;
2252	max = 1;
2253	ecode++;
2254	goto REPEATCHAR;
2255
2256	case OP_STAR:
2257	case OP_MINSTAR:
2258	case OP_PLUS:
2259	case OP_MINPLUS:
2260	case OP_QUERY:
2261	case OP_MINQUERY:
2262	c = *ecode++ - OP_STAR;
2263	minimize = (c & 1) != 0;
2264	min = rep_min[c]; /* Pick up values from tables; */
2265	max = rep_max[c]; /* zero for max => infinity */
2266	if (max == 0) max = INT_MAX;
2267
2268	/* Common code for all repeated single-character matches. We can give
2269	up quickly if there are fewer than the minimum number of characters left in
2270	the subject. */
2271
2272	REPEATCHAR:
2273	#ifdef SUPPORT_UTF8
2274	if (utf8)
2275	{
2276	length = 1;
2277	charptr = ecode;
2278	GETCHARLEN(fc, ecode, length);
2279	if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2280	ecode += length;
2281
2282	/* Handle multibyte character matching specially here. There is
2283	support for caseless matching if UCP support is present. */
2284
2285	if (length > 1)
2286	{
2287	#ifdef SUPPORT_UCP
2288	unsigned int othercase;
2289	if ((ims & PCRE_CASELESS) != 0 &&
2290	(othercase = UCD_OTHERCASE(fc)) != fc)
2291	oclength = _pcre_ord2utf8(othercase, occhars);
2292	else oclength = 0;
2293	#endif /* SUPPORT_UCP */
2294
2295	for (i = 1; i <= min; i++)
2296	{
2297	if (memcmp(eptr, charptr, length) == 0) eptr += length;
2298	#ifdef SUPPORT_UCP
2299	/* Need braces because of following else */
2300	else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2301	else
2302	{
2303	if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2304	eptr += oclength;
2305	}
2306	#else /* without SUPPORT_UCP */
2307	else { RRETURN(MATCH_NOMATCH); }
2308	#endif /* SUPPORT_UCP */
2309	}
2310
2311	if (min == max) continue;
2312
2313	if (minimize)
2314	{
2315	for (fi = min;; fi++)
2316	{
2317	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2318	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2319	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2320	if (memcmp(eptr, charptr, length) == 0) eptr += length;
2321	#ifdef SUPPORT_UCP
2322	/* Need braces because of following else */
2323	else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2324	else
2325	{
2326	if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2327	eptr += oclength;
2328	}
2329	#else /* without SUPPORT_UCP */
2330	else { RRETURN (MATCH_NOMATCH); }
2331	#endif /* SUPPORT_UCP */
2332	}
2333	/* Control never gets here */
2334	}
2335
2336	else /* Maximize */
2337	{
2338	pp = eptr;
2339	for (i = min; i < max; i++)
2340	{
2341	if (eptr > md->end_subject - length) break;
2342	if (memcmp(eptr, charptr, length) == 0) eptr += length;
2343	#ifdef SUPPORT_UCP
2344	else if (oclength == 0) break;
2345	else
2346	{
2347	if (memcmp(eptr, occhars, oclength) != 0) break;
2348	eptr += oclength;
2349	}
2350	#else /* without SUPPORT_UCP */
2351	else break;
2352	#endif /* SUPPORT_UCP */
2353	}
2354
2355	if (possessive) continue;
2356	for(;;)
2357	{
2358	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2359	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2360	if (eptr == pp) RRETURN(MATCH_NOMATCH);
2361	#ifdef SUPPORT_UCP
2362	eptr--;
2363	BACKCHAR(eptr);
2364	#else /* without SUPPORT_UCP */
2365	eptr -= length;
2366	#endif /* SUPPORT_UCP */
2367	}
2368	}
2369	/* Control never gets here */
2370	}
2371
2372	/* If the length of a UTF-8 character is 1, we fall through here, and
2373	obey the code as for non-UTF-8 characters below, though in this case the
2374	value of fc will always be < 128. */
2375	}
2376	else
2377	#endif /* SUPPORT_UTF8 */
2378
2379	/* When not in UTF-8 mode, load a single-byte character. */
2380	{
2381	if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2382	fc = *ecode++;
2383	}
2384
2385	/* The value of fc at this point is always less than 256, though we may or
2386	may not be in UTF-8 mode. The code is duplicated for the caseless and
2387	caseful cases, for speed, since matching characters is likely to be quite
2388	common. First, ensure the minimum number of matches are present. If min =
2389	max, continue at the same level without recursing. Otherwise, if
2390	minimizing, keep trying the rest of the expression and advancing one
2391	matching character if failing, up to the maximum. Alternatively, if
2392	maximizing, find the maximum number of characters and work backwards. */
2393
2394	DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2395	max, eptr));
2396
2397	if ((ims & PCRE_CASELESS) != 0)
2398	{
2399	fc = md->lcc[fc];
2400	for (i = 1; i <= min; i++)
2401	if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2402	if (min == max) continue;
2403	if (minimize)
2404	{
2405	for (fi = min;; fi++)
2406	{
2407	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2408	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2409	if (fi >= max \|\| eptr >= md->end_subject \|\|
2410	fc != md->lcc[*eptr++])
2411	RRETURN(MATCH_NOMATCH);
2412	}
2413	/* Control never gets here */
2414	}
2415	else /* Maximize */
2416	{
2417	pp = eptr;
2418	for (i = min; i < max; i++)
2419	{
2420	if (eptr >= md->end_subject \|\| fc != md->lcc[*eptr]) break;
2421	eptr++;
2422	}
2423	if (possessive) continue;
2424	while (eptr >= pp)
2425	{
2426	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2427	eptr--;
2428	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2429	}
2430	RRETURN(MATCH_NOMATCH);
2431	}
2432	/* Control never gets here */
2433	}
2434
2435	/* Caseful comparisons (includes all multi-byte characters) */
2436
2437	else
2438	{
2439	for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2440	if (min == max) continue;
2441	if (minimize)
2442	{
2443	for (fi = min;; fi++)
2444	{
2445	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2446	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2447	if (fi >= max \|\| eptr >= md->end_subject \|\| fc != *eptr++)
2448	RRETURN(MATCH_NOMATCH);
2449	}
2450	/* Control never gets here */
2451	}
2452	else /* Maximize */
2453	{
2454	pp = eptr;
2455	for (i = min; i < max; i++)
2456	{
2457	if (eptr >= md->end_subject \|\| fc != *eptr) break;
2458	eptr++;
2459	}
2460	if (possessive) continue;
2461	while (eptr >= pp)
2462	{
2463	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2464	eptr--;
2465	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2466	}
2467	RRETURN(MATCH_NOMATCH);
2468	}
2469	}
2470	/* Control never gets here */
2471
2472	/* Match a negated single one-byte character. The character we are
2473	checking can be multibyte. */
2474
2475	case OP_NOT:
2476	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2477	ecode++;
2478	GETCHARINCTEST(c, eptr);
2479	if ((ims & PCRE_CASELESS) != 0)
2480	{
2481	#ifdef SUPPORT_UTF8
2482	if (c < 256)
2483	#endif
2484	c = md->lcc[c];
2485	if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2486	}
2487	else
2488	{
2489	if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2490	}
2491	break;
2492
2493	/* Match a negated single one-byte character repeatedly. This is almost a
2494	repeat of the code for a repeated single character, but I haven't found a
2495	nice way of commoning these up that doesn't require a test of the
2496	positive/negative option for each character match. Maybe that wouldn't add
2497	very much to the time taken, but character matching is what this is all
2498	about... */
2499
2500	case OP_NOTEXACT:
2501	min = max = GET2(ecode, 1);
2502	ecode += 3;
2503	goto REPEATNOTCHAR;
2504
2505	case OP_NOTUPTO:
2506	case OP_NOTMINUPTO:
2507	min = 0;
2508	max = GET2(ecode, 1);
2509	minimize = *ecode == OP_NOTMINUPTO;
2510	ecode += 3;
2511	goto REPEATNOTCHAR;
2512
2513	case OP_NOTPOSSTAR:
2514	possessive = TRUE;
2515	min = 0;
2516	max = INT_MAX;
2517	ecode++;
2518	goto REPEATNOTCHAR;
2519
2520	case OP_NOTPOSPLUS:
2521	possessive = TRUE;
2522	min = 1;
2523	max = INT_MAX;
2524	ecode++;
2525	goto REPEATNOTCHAR;
2526
2527	case OP_NOTPOSQUERY:
2528	possessive = TRUE;
2529	min = 0;
2530	max = 1;
2531	ecode++;
2532	goto REPEATNOTCHAR;
2533
2534	case OP_NOTPOSUPTO:
2535	possessive = TRUE;
2536	min = 0;
2537	max = GET2(ecode, 1);
2538	ecode += 3;
2539	goto REPEATNOTCHAR;
2540
2541	case OP_NOTSTAR:
2542	case OP_NOTMINSTAR:
2543	case OP_NOTPLUS:
2544	case OP_NOTMINPLUS:
2545	case OP_NOTQUERY:
2546	case OP_NOTMINQUERY:
2547	c = *ecode++ - OP_NOTSTAR;
2548	minimize = (c & 1) != 0;
2549	min = rep_min[c]; /* Pick up values from tables; */
2550	max = rep_max[c]; /* zero for max => infinity */
2551	if (max == 0) max = INT_MAX;
2552
2553	/* Common code for all repeated single-byte matches. We can give up quickly
2554	if there are fewer than the minimum number of bytes left in the
2555	subject. */
2556
2557	REPEATNOTCHAR:
2558	if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2559	fc = *ecode++;
2560
2561	/* The code is duplicated for the caseless and caseful cases, for speed,
2562	since matching characters is likely to be quite common. First, ensure the
2563	minimum number of matches are present. If min = max, continue at the same
2564	level without recursing. Otherwise, if minimizing, keep trying the rest of
2565	the expression and advancing one matching character if failing, up to the
2566	maximum. Alternatively, if maximizing, find the maximum number of
2567	characters and work backwards. */
2568
2569	DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2570	max, eptr));
2571
2572	if ((ims & PCRE_CASELESS) != 0)
2573	{
2574	fc = md->lcc[fc];
2575
2576	#ifdef SUPPORT_UTF8
2577	/* UTF-8 mode */
2578	if (utf8)
2579	{
2580	register unsigned int d;
2581	for (i = 1; i <= min; i++)
2582	{
2583	GETCHARINC(d, eptr);
2584	if (d < 256) d = md->lcc[d];
2585	if (fc == d) RRETURN(MATCH_NOMATCH);
2586	}
2587	}
2588	else
2589	#endif
2590
2591	/* Not UTF-8 mode */
2592	{
2593	for (i = 1; i <= min; i++)
2594	if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2595	}
2596
2597	if (min == max) continue;
2598
2599	if (minimize)
2600	{
2601	#ifdef SUPPORT_UTF8
2602	/* UTF-8 mode */
2603	if (utf8)
2604	{
2605	register unsigned int d;
2606	for (fi = min;; fi++)
2607	{
2608	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2609	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2610	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2611	GETCHARINC(d, eptr);
2612	if (d < 256) d = md->lcc[d];
2613	if (fc == d) RRETURN(MATCH_NOMATCH);
2614
2615	}
2616	}
2617	else
2618	#endif
2619	/* Not UTF-8 mode */
2620	{
2621	for (fi = min;; fi++)
2622	{
2623	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2624	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2625	if (fi >= max \|\| eptr >= md->end_subject \|\| fc == md->lcc[*eptr++])
2626	RRETURN(MATCH_NOMATCH);
2627	}
2628	}
2629	/* Control never gets here */
2630	}
2631
2632	/* Maximize case */
2633
2634	else
2635	{
2636	pp = eptr;
2637
2638	#ifdef SUPPORT_UTF8
2639	/* UTF-8 mode */
2640	if (utf8)
2641	{
2642	register unsigned int d;
2643	for (i = min; i < max; i++)
2644	{
2645	int len = 1;
2646	if (eptr >= md->end_subject) break;
2647	GETCHARLEN(d, eptr, len);
2648	if (d < 256) d = md->lcc[d];
2649	if (fc == d) break;
2650	eptr += len;
2651	}
2652	if (possessive) continue;
2653	for(;;)
2654	{
2655	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2656	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2657	if (eptr-- == pp) break; /* Stop if tried at original pos */
2658	BACKCHAR(eptr);
2659	}
2660	}
2661	else
2662	#endif
2663	/* Not UTF-8 mode */
2664	{
2665	for (i = min; i < max; i++)
2666	{
2667	if (eptr >= md->end_subject \|\| fc == md->lcc[*eptr]) break;
2668	eptr++;
2669	}
2670	if (possessive) continue;
2671	while (eptr >= pp)
2672	{
2673	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2674	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2675	eptr--;
2676	}
2677	}
2678
2679	RRETURN(MATCH_NOMATCH);
2680	}
2681	/* Control never gets here */
2682	}
2683
2684	/* Caseful comparisons */
2685
2686	else
2687	{
2688	#ifdef SUPPORT_UTF8
2689	/* UTF-8 mode */
2690	if (utf8)
2691	{
2692	register unsigned int d;
2693	for (i = 1; i <= min; i++)
2694	{
2695	GETCHARINC(d, eptr);
2696	if (fc == d) RRETURN(MATCH_NOMATCH);
2697	}
2698	}
2699	else
2700	#endif
2701	/* Not UTF-8 mode */
2702	{
2703	for (i = 1; i <= min; i++)
2704	if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2705	}
2706
2707	if (min == max) continue;
2708
2709	if (minimize)
2710	{
2711	#ifdef SUPPORT_UTF8
2712	/* UTF-8 mode */
2713	if (utf8)
2714	{
2715	register unsigned int d;
2716	for (fi = min;; fi++)
2717	{
2718	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2719	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2720	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2721	GETCHARINC(d, eptr);
2722	if (fc == d) RRETURN(MATCH_NOMATCH);
2723	}
2724	}
2725	else
2726	#endif
2727	/* Not UTF-8 mode */
2728	{
2729	for (fi = min;; fi++)
2730	{
2731	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2732	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2733	if (fi >= max \|\| eptr >= md->end_subject \|\| fc == *eptr++)
2734	RRETURN(MATCH_NOMATCH);
2735	}
2736	}
2737	/* Control never gets here */
2738	}
2739
2740	/* Maximize case */
2741
2742	else
2743	{
2744	pp = eptr;
2745
2746	#ifdef SUPPORT_UTF8
2747	/* UTF-8 mode */
2748	if (utf8)
2749	{
2750	register unsigned int d;
2751	for (i = min; i < max; i++)
2752	{
2753	int len = 1;
2754	if (eptr >= md->end_subject) break;
2755	GETCHARLEN(d, eptr, len);
2756	if (fc == d) break;
2757	eptr += len;
2758	}
2759	if (possessive) continue;
2760	for(;;)
2761	{
2762	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2763	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2764	if (eptr-- == pp) break; /* Stop if tried at original pos */
2765	BACKCHAR(eptr);
2766	}
2767	}
2768	else
2769	#endif
2770	/* Not UTF-8 mode */
2771	{
2772	for (i = min; i < max; i++)
2773	{
2774	if (eptr >= md->end_subject \|\| fc == *eptr) break;
2775	eptr++;
2776	}
2777	if (possessive) continue;
2778	while (eptr >= pp)
2779	{
2780	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2781	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2782	eptr--;
2783	}
2784	}
2785
2786	RRETURN(MATCH_NOMATCH);
2787	}
2788	}
2789	/* Control never gets here */
2790
2791	/* Match a single character type repeatedly; several different opcodes
2792	share code. This is very similar to the code for single characters, but we
2793	repeat it in the interests of efficiency. */
2794
2795	case OP_TYPEEXACT:
2796	min = max = GET2(ecode, 1);
2797	minimize = TRUE;
2798	ecode += 3;
2799	goto REPEATTYPE;
2800
2801	case OP_TYPEUPTO:
2802	case OP_TYPEMINUPTO:
2803	min = 0;
2804	max = GET2(ecode, 1);
2805	minimize = *ecode == OP_TYPEMINUPTO;
2806	ecode += 3;
2807	goto REPEATTYPE;
2808
2809	case OP_TYPEPOSSTAR:
2810	possessive = TRUE;
2811	min = 0;
2812	max = INT_MAX;
2813	ecode++;
2814	goto REPEATTYPE;
2815
2816	case OP_TYPEPOSPLUS:
2817	possessive = TRUE;
2818	min = 1;
2819	max = INT_MAX;
2820	ecode++;
2821	goto REPEATTYPE;
2822
2823	case OP_TYPEPOSQUERY:
2824	possessive = TRUE;
2825	min = 0;
2826	max = 1;
2827	ecode++;
2828	goto REPEATTYPE;
2829
2830	case OP_TYPEPOSUPTO:
2831	possessive = TRUE;
2832	min = 0;
2833	max = GET2(ecode, 1);
2834	ecode += 3;
2835	goto REPEATTYPE;
2836
2837	case OP_TYPESTAR:
2838	case OP_TYPEMINSTAR:
2839	case OP_TYPEPLUS:
2840	case OP_TYPEMINPLUS:
2841	case OP_TYPEQUERY:
2842	case OP_TYPEMINQUERY:
2843	c = *ecode++ - OP_TYPESTAR;
2844	minimize = (c & 1) != 0;
2845	min = rep_min[c]; /* Pick up values from tables; */
2846	max = rep_max[c]; /* zero for max => infinity */
2847	if (max == 0) max = INT_MAX;
2848
2849	/* Common code for all repeated single character type matches. Note that
2850	in UTF-8 mode, '.' matches a character of any length, but for the other
2851	character types, the valid characters are all one-byte long. */
2852
2853	REPEATTYPE:
2854	ctype = ecode++; / Code for the character type */
2855
2856	#ifdef SUPPORT_UCP
2857	if (ctype == OP_PROP \|\| ctype == OP_NOTPROP)
2858	{
2859	prop_fail_result = ctype == OP_NOTPROP;
2860	prop_type = *ecode++;
2861	prop_value = *ecode++;
2862	}
2863	else prop_type = -1;
2864	#endif
2865
2866	/* First, ensure the minimum number of matches are present. Use inline
2867	code for maximizing the speed, and do the type test once at the start
2868	(i.e. keep it out of the loop). Also we can test that there are at least
2869	the minimum number of bytes before we start. This isn't as effective in
2870	UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2871	is tidier. Also separate the UCP code, which can be the same for both UTF-8
2872	and single-bytes. */
2873
2874	if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2875	if (min > 0)
2876	{
2877	#ifdef SUPPORT_UCP
2878	if (prop_type >= 0)
2879	{
2880	switch(prop_type)
2881	{
2882	case PT_ANY:
2883	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2884	for (i = 1; i <= min; i++)
2885	{
2886	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2887	GETCHARINCTEST(c, eptr);
2888	}
2889	break;
2890
2891	case PT_LAMP:
2892	for (i = 1; i <= min; i++)
2893	{
2894	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2895	GETCHARINCTEST(c, eptr);
2896	prop_chartype = UCD_CHARTYPE(c);
2897	if ((prop_chartype == ucp_Lu \|\|
2898	prop_chartype == ucp_Ll \|\|
2899	prop_chartype == ucp_Lt) == prop_fail_result)
2900	RRETURN(MATCH_NOMATCH);
2901	}
2902	break;
2903
2904	case PT_GC:
2905	for (i = 1; i <= min; i++)
2906	{
2907	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2908	GETCHARINCTEST(c, eptr);
2909	prop_category = UCD_CATEGORY(c);
2910	if ((prop_category == prop_value) == prop_fail_result)
2911	RRETURN(MATCH_NOMATCH);
2912	}
2913	break;
2914
2915	case PT_PC:
2916	for (i = 1; i <= min; i++)
2917	{
2918	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2919	GETCHARINCTEST(c, eptr);
2920	prop_chartype = UCD_CHARTYPE(c);
2921	if ((prop_chartype == prop_value) == prop_fail_result)
2922	RRETURN(MATCH_NOMATCH);
2923	}
2924	break;
2925
2926	case PT_SC:
2927	for (i = 1; i <= min; i++)
2928	{
2929	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2930	GETCHARINCTEST(c, eptr);
2931	prop_script = UCD_SCRIPT(c);
2932	if ((prop_script == prop_value) == prop_fail_result)
2933	RRETURN(MATCH_NOMATCH);
2934	}
2935	break;
2936
2937	default:
2938	RRETURN(PCRE_ERROR_INTERNAL);
2939	}
2940	}
2941
2942	/* Match extended Unicode sequences. We will get here only if the
2943	support is in the binary; otherwise a compile-time error occurs. */
2944
2945	else if (ctype == OP_EXTUNI)
2946	{
2947	for (i = 1; i <= min; i++)
2948	{
2949	GETCHARINCTEST(c, eptr);
2950	prop_category = UCD_CATEGORY(c);
2951	if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2952	while (eptr < md->end_subject)
2953	{
2954	int len = 1;
2955	if (!utf8) c = *eptr; else
2956	{
2957	GETCHARLEN(c, eptr, len);
2958	}
2959	prop_category = UCD_CATEGORY(c);
2960	if (prop_category != ucp_M) break;
2961	eptr += len;
2962	}
2963	}
2964	}
2965
2966	else
2967	#endif /* SUPPORT_UCP */
2968
2969	/* Handle all other cases when the coding is UTF-8 */
2970
2971	#ifdef SUPPORT_UTF8
2972	if (utf8) switch(ctype)
2973	{
2974	case OP_ANY:
2975	for (i = 1; i <= min; i++)
2976	{
2977	if (eptr >= md->end_subject \|\| IS_NEWLINE(eptr))
2978	RRETURN(MATCH_NOMATCH);
2979	eptr++;
2980	while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2981	}
2982	break;
2983
2984	case OP_ALLANY:
2985	for (i = 1; i <= min; i++)
2986	{
2987	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2988	eptr++;
2989	while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2990	}
2991	break;
2992
2993	case OP_ANYBYTE:
2994	eptr += min;
2995	break;
2996
2997	case OP_ANYNL:
2998	for (i = 1; i <= min; i++)
2999	{
3000	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3001	GETCHARINC(c, eptr);
3002	switch(c)
3003	{
3004	default: RRETURN(MATCH_NOMATCH);
3005	case 0x000d:
3006	if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3007	break;
3008
3009	case 0x000a:
3010	break;
3011
3012	case 0x000b:
3013	case 0x000c:
3014	case 0x0085:
3015	case 0x2028:
3016	case 0x2029:
3017	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3018	break;
3019	}
3020	}
3021	break;
3022
3023	case OP_NOT_HSPACE:
3024	for (i = 1; i <= min; i++)
3025	{
3026	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3027	GETCHARINC(c, eptr);
3028	switch(c)
3029	{
3030	default: break;
3031	case 0x09: /* HT */
3032	case 0x20: /* SPACE */
3033	case 0xa0: /* NBSP */
3034	case 0x1680: /* OGHAM SPACE MARK */
3035	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3036	case 0x2000: /* EN QUAD */
3037	case 0x2001: /* EM QUAD */
3038	case 0x2002: /* EN SPACE */
3039	case 0x2003: /* EM SPACE */
3040	case 0x2004: /* THREE-PER-EM SPACE */
3041	case 0x2005: /* FOUR-PER-EM SPACE */
3042	case 0x2006: /* SIX-PER-EM SPACE */
3043	case 0x2007: /* FIGURE SPACE */
3044	case 0x2008: /* PUNCTUATION SPACE */
3045	case 0x2009: /* THIN SPACE */
3046	case 0x200A: /* HAIR SPACE */
3047	case 0x202f: /* NARROW NO-BREAK SPACE */
3048	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3049	case 0x3000: /* IDEOGRAPHIC SPACE */
3050	RRETURN(MATCH_NOMATCH);
3051	}
3052	}
3053	break;
3054
3055	case OP_HSPACE:
3056	for (i = 1; i <= min; i++)
3057	{
3058	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3059	GETCHARINC(c, eptr);
3060	switch(c)
3061	{
3062	default: RRETURN(MATCH_NOMATCH);
3063	case 0x09: /* HT */
3064	case 0x20: /* SPACE */
3065	case 0xa0: /* NBSP */
3066	case 0x1680: /* OGHAM SPACE MARK */
3067	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3068	case 0x2000: /* EN QUAD */
3069	case 0x2001: /* EM QUAD */
3070	case 0x2002: /* EN SPACE */
3071	case 0x2003: /* EM SPACE */
3072	case 0x2004: /* THREE-PER-EM SPACE */
3073	case 0x2005: /* FOUR-PER-EM SPACE */
3074	case 0x2006: /* SIX-PER-EM SPACE */
3075	case 0x2007: /* FIGURE SPACE */
3076	case 0x2008: /* PUNCTUATION SPACE */
3077	case 0x2009: /* THIN SPACE */
3078	case 0x200A: /* HAIR SPACE */
3079	case 0x202f: /* NARROW NO-BREAK SPACE */
3080	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3081	case 0x3000: /* IDEOGRAPHIC SPACE */
3082	break;
3083	}
3084	}
3085	break;
3086
3087	case OP_NOT_VSPACE:
3088	for (i = 1; i <= min; i++)
3089	{
3090	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3091	GETCHARINC(c, eptr);
3092	switch(c)
3093	{
3094	default: break;
3095	case 0x0a: /* LF */
3096	case 0x0b: /* VT */
3097	case 0x0c: /* FF */
3098	case 0x0d: /* CR */
3099	case 0x85: /* NEL */
3100	case 0x2028: /* LINE SEPARATOR */
3101	case 0x2029: /* PARAGRAPH SEPARATOR */
3102	RRETURN(MATCH_NOMATCH);
3103	}
3104	}
3105	break;
3106
3107	case OP_VSPACE:
3108	for (i = 1; i <= min; i++)
3109	{
3110	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3111	GETCHARINC(c, eptr);
3112	switch(c)
3113	{
3114	default: RRETURN(MATCH_NOMATCH);
3115	case 0x0a: /* LF */
3116	case 0x0b: /* VT */
3117	case 0x0c: /* FF */
3118	case 0x0d: /* CR */
3119	case 0x85: /* NEL */
3120	case 0x2028: /* LINE SEPARATOR */
3121	case 0x2029: /* PARAGRAPH SEPARATOR */
3122	break;
3123	}
3124	}
3125	break;
3126
3127	case OP_NOT_DIGIT:
3128	for (i = 1; i <= min; i++)
3129	{
3130	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3131	GETCHARINC(c, eptr);
3132	if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3133	RRETURN(MATCH_NOMATCH);
3134	}
3135	break;
3136
3137	case OP_DIGIT:
3138	for (i = 1; i <= min; i++)
3139	{
3140	if (eptr >= md->end_subject \|\|
3141	eptr >= 128 \|\| (md->ctypes[eptr++] & ctype_digit) == 0)
3142	RRETURN(MATCH_NOMATCH);
3143	/* No need to skip more bytes - we know it's a 1-byte character */
3144	}
3145	break;
3146
3147	case OP_NOT_WHITESPACE:
3148	for (i = 1; i <= min; i++)
3149	{
3150	if (eptr >= md->end_subject \|\|
3151	(eptr < 128 && (md->ctypes[eptr] & ctype_space) != 0))
3152	RRETURN(MATCH_NOMATCH);
3153	while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3154	}
3155	break;
3156
3157	case OP_WHITESPACE:
3158	for (i = 1; i <= min; i++)
3159	{
3160	if (eptr >= md->end_subject \|\|
3161	eptr >= 128 \|\| (md->ctypes[eptr++] & ctype_space) == 0)
3162	RRETURN(MATCH_NOMATCH);
3163	/* No need to skip more bytes - we know it's a 1-byte character */
3164	}
3165	break;
3166
3167	case OP_NOT_WORDCHAR:
3168	for (i = 1; i <= min; i++)
3169	{
3170	if (eptr >= md->end_subject \|\|
3171	(eptr < 128 && (md->ctypes[eptr] & ctype_word) != 0))
3172	RRETURN(MATCH_NOMATCH);
3173	while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3174	}
3175	break;
3176
3177	case OP_WORDCHAR:
3178	for (i = 1; i <= min; i++)
3179	{
3180	if (eptr >= md->end_subject \|\|
3181	eptr >= 128 \|\| (md->ctypes[eptr++] & ctype_word) == 0)
3182	RRETURN(MATCH_NOMATCH);
3183	/* No need to skip more bytes - we know it's a 1-byte character */
3184	}
3185	break;
3186
3187	default:
3188	RRETURN(PCRE_ERROR_INTERNAL);
3189	} /* End switch(ctype) */
3190
3191	else
3192	#endif /* SUPPORT_UTF8 */
3193
3194	/* Code for the non-UTF-8 case for minimum matching of operators other
3195	than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3196	number of bytes present, as this was tested above. */
3197
3198	switch(ctype)
3199	{
3200	case OP_ANY:
3201	for (i = 1; i <= min; i++)
3202	{
3203	if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3204	eptr++;
3205	}
3206	break;
3207
3208	case OP_ALLANY:
3209	eptr += min;
3210	break;
3211
3212	case OP_ANYBYTE:
3213	eptr += min;
3214	break;
3215
3216	/* Because of the CRLF case, we can't assume the minimum number of
3217	bytes are present in this case. */
3218
3219	case OP_ANYNL:
3220	for (i = 1; i <= min; i++)
3221	{
3222	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3223	switch(*eptr++)
3224	{
3225	default: RRETURN(MATCH_NOMATCH);
3226	case 0x000d:
3227	if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3228	break;
3229	case 0x000a:
3230	break;
3231
3232	case 0x000b:
3233	case 0x000c:
3234	case 0x0085:
3235	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3236	break;
3237	}
3238	}
3239	break;
3240
3241	case OP_NOT_HSPACE:
3242	for (i = 1; i <= min; i++)
3243	{
3244	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3245	switch(*eptr++)
3246	{
3247	default: break;
3248	case 0x09: /* HT */
3249	case 0x20: /* SPACE */
3250	case 0xa0: /* NBSP */
3251	RRETURN(MATCH_NOMATCH);
3252	}
3253	}
3254	break;
3255
3256	case OP_HSPACE:
3257	for (i = 1; i <= min; i++)
3258	{
3259	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3260	switch(*eptr++)
3261	{
3262	default: RRETURN(MATCH_NOMATCH);
3263	case 0x09: /* HT */
3264	case 0x20: /* SPACE */
3265	case 0xa0: /* NBSP */
3266	break;
3267	}
3268	}
3269	break;
3270
3271	case OP_NOT_VSPACE:
3272	for (i = 1; i <= min; i++)
3273	{
3274	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3275	switch(*eptr++)
3276	{
3277	default: break;
3278	case 0x0a: /* LF */
3279	case 0x0b: /* VT */
3280	case 0x0c: /* FF */
3281	case 0x0d: /* CR */
3282	case 0x85: /* NEL */
3283	RRETURN(MATCH_NOMATCH);
3284	}
3285	}
3286	break;
3287
3288	case OP_VSPACE:
3289	for (i = 1; i <= min; i++)
3290	{
3291	if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3292	switch(*eptr++)
3293	{
3294	default: RRETURN(MATCH_NOMATCH);
3295	case 0x0a: /* LF */
3296	case 0x0b: /* VT */
3297	case 0x0c: /* FF */
3298	case 0x0d: /* CR */
3299	case 0x85: /* NEL */
3300	break;
3301	}
3302	}
3303	break;
3304
3305	case OP_NOT_DIGIT:
3306	for (i = 1; i <= min; i++)
3307	if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3308	break;
3309
3310	case OP_DIGIT:
3311	for (i = 1; i <= min; i++)
3312	if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3313	break;
3314
3315	case OP_NOT_WHITESPACE:
3316	for (i = 1; i <= min; i++)
3317	if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3318	break;
3319
3320	case OP_WHITESPACE:
3321	for (i = 1; i <= min; i++)
3322	if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3323	break;
3324
3325	case OP_NOT_WORDCHAR:
3326	for (i = 1; i <= min; i++)
3327	if ((md->ctypes[*eptr++] & ctype_word) != 0)
3328	RRETURN(MATCH_NOMATCH);
3329	break;
3330
3331	case OP_WORDCHAR:
3332	for (i = 1; i <= min; i++)
3333	if ((md->ctypes[*eptr++] & ctype_word) == 0)
3334	RRETURN(MATCH_NOMATCH);
3335	break;
3336
3337	default:
3338	RRETURN(PCRE_ERROR_INTERNAL);
3339	}
3340	}
3341
3342	/* If min = max, continue at the same level without recursing */
3343
3344	if (min == max) continue;
3345
3346	/* If minimizing, we have to test the rest of the pattern before each
3347	subsequent match. Again, separate the UTF-8 case for speed, and also
3348	separate the UCP cases. */
3349
3350	if (minimize)
3351	{
3352	#ifdef SUPPORT_UCP
3353	if (prop_type >= 0)
3354	{
3355	switch(prop_type)
3356	{
3357	case PT_ANY:
3358	for (fi = min;; fi++)
3359	{
3360	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3361	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3362	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3363	GETCHARINC(c, eptr);
3364	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3365	}
3366	/* Control never gets here */
3367
3368	case PT_LAMP:
3369	for (fi = min;; fi++)
3370	{
3371	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3372	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3373	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3374	GETCHARINC(c, eptr);
3375	prop_chartype = UCD_CHARTYPE(c);
3376	if ((prop_chartype == ucp_Lu \|\|
3377	prop_chartype == ucp_Ll \|\|
3378	prop_chartype == ucp_Lt) == prop_fail_result)
3379	RRETURN(MATCH_NOMATCH);
3380	}
3381	/* Control never gets here */
3382
3383	case PT_GC:
3384	for (fi = min;; fi++)
3385	{
3386	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3387	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3389	GETCHARINC(c, eptr);
3390	prop_category = UCD_CATEGORY(c);
3391	if ((prop_category == prop_value) == prop_fail_result)
3392	RRETURN(MATCH_NOMATCH);
3393	}
3394	/* Control never gets here */
3395
3396	case PT_PC:
3397	for (fi = min;; fi++)
3398	{
3399	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3400	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3401	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3402	GETCHARINC(c, eptr);
3403	prop_chartype = UCD_CHARTYPE(c);
3404	if ((prop_chartype == prop_value) == prop_fail_result)
3405	RRETURN(MATCH_NOMATCH);
3406	}
3407	/* Control never gets here */
3408
3409	case PT_SC:
3410	for (fi = min;; fi++)
3411	{
3412	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3413	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3414	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3415	GETCHARINC(c, eptr);
3416	prop_script = UCD_SCRIPT(c);
3417	if ((prop_script == prop_value) == prop_fail_result)
3418	RRETURN(MATCH_NOMATCH);
3419	}
3420	/* Control never gets here */
3421
3422	default:
3423	RRETURN(PCRE_ERROR_INTERNAL);
3424	}
3425	}
3426
3427	/* Match extended Unicode sequences. We will get here only if the
3428	support is in the binary; otherwise a compile-time error occurs. */
3429
3430	else if (ctype == OP_EXTUNI)
3431	{
3432	for (fi = min;; fi++)
3433	{
3434	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3435	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3436	if (fi >= max \|\| eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3437	GETCHARINCTEST(c, eptr);
3438	prop_category = UCD_CATEGORY(c);
3439	if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3440	while (eptr < md->end_subject)
3441	{
3442	int len = 1;
3443	if (!utf8) c = *eptr; else
3444	{
3445	GETCHARLEN(c, eptr, len);
3446	}
3447	prop_category = UCD_CATEGORY(c);
3448	if (prop_category != ucp_M) break;
3449	eptr += len;
3450	}
3451	}
3452	}
3453
3454	else
3455	#endif /* SUPPORT_UCP */
3456
3457	#ifdef SUPPORT_UTF8
3458	/* UTF-8 mode */
3459	if (utf8)
3460	{
3461	for (fi = min;; fi++)
3462	{
3463	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3464	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3465	if (fi >= max \|\| eptr >= md->end_subject \|\|
3466	(ctype == OP_ANY && IS_NEWLINE(eptr)))
3467	RRETURN(MATCH_NOMATCH);
3468
3469	GETCHARINC(c, eptr);
3470	switch(ctype)
3471	{
3472	case OP_ANY: /* This is the non-NL case */
3473	case OP_ALLANY:
3474	case OP_ANYBYTE:
3475	break;
3476
3477	case OP_ANYNL:
3478	switch(c)
3479	{
3480	default: RRETURN(MATCH_NOMATCH);
3481	case 0x000d:
3482	if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3483	break;
3484	case 0x000a:
3485	break;
3486
3487	case 0x000b:
3488	case 0x000c:
3489	case 0x0085:
3490	case 0x2028:
3491	case 0x2029:
3492	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3493	break;
3494	}
3495	break;
3496
3497	case OP_NOT_HSPACE:
3498	switch(c)
3499	{
3500	default: break;
3501	case 0x09: /* HT */
3502	case 0x20: /* SPACE */
3503	case 0xa0: /* NBSP */
3504	case 0x1680: /* OGHAM SPACE MARK */
3505	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3506	case 0x2000: /* EN QUAD */
3507	case 0x2001: /* EM QUAD */
3508	case 0x2002: /* EN SPACE */
3509	case 0x2003: /* EM SPACE */
3510	case 0x2004: /* THREE-PER-EM SPACE */
3511	case 0x2005: /* FOUR-PER-EM SPACE */
3512	case 0x2006: /* SIX-PER-EM SPACE */
3513	case 0x2007: /* FIGURE SPACE */
3514	case 0x2008: /* PUNCTUATION SPACE */
3515	case 0x2009: /* THIN SPACE */
3516	case 0x200A: /* HAIR SPACE */
3517	case 0x202f: /* NARROW NO-BREAK SPACE */
3518	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3519	case 0x3000: /* IDEOGRAPHIC SPACE */
3520	RRETURN(MATCH_NOMATCH);
3521	}
3522	break;
3523
3524	case OP_HSPACE:
3525	switch(c)
3526	{
3527	default: RRETURN(MATCH_NOMATCH);
3528	case 0x09: /* HT */
3529	case 0x20: /* SPACE */
3530	case 0xa0: /* NBSP */
3531	case 0x1680: /* OGHAM SPACE MARK */
3532	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3533	case 0x2000: /* EN QUAD */
3534	case 0x2001: /* EM QUAD */
3535	case 0x2002: /* EN SPACE */
3536	case 0x2003: /* EM SPACE */
3537	case 0x2004: /* THREE-PER-EM SPACE */
3538	case 0x2005: /* FOUR-PER-EM SPACE */
3539	case 0x2006: /* SIX-PER-EM SPACE */
3540	case 0x2007: /* FIGURE SPACE */
3541	case 0x2008: /* PUNCTUATION SPACE */
3542	case 0x2009: /* THIN SPACE */
3543	case 0x200A: /* HAIR SPACE */
3544	case 0x202f: /* NARROW NO-BREAK SPACE */
3545	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3546	case 0x3000: /* IDEOGRAPHIC SPACE */
3547	break;
3548	}
3549	break;
3550
3551	case OP_NOT_VSPACE:
3552	switch(c)
3553	{
3554	default: break;
3555	case 0x0a: /* LF */
3556	case 0x0b: /* VT */
3557	case 0x0c: /* FF */
3558	case 0x0d: /* CR */
3559	case 0x85: /* NEL */
3560	case 0x2028: /* LINE SEPARATOR */
3561	case 0x2029: /* PARAGRAPH SEPARATOR */
3562	RRETURN(MATCH_NOMATCH);
3563	}
3564	break;
3565
3566	case OP_VSPACE:
3567	switch(c)
3568	{
3569	default: RRETURN(MATCH_NOMATCH);
3570	case 0x0a: /* LF */
3571	case 0x0b: /* VT */
3572	case 0x0c: /* FF */
3573	case 0x0d: /* CR */
3574	case 0x85: /* NEL */
3575	case 0x2028: /* LINE SEPARATOR */
3576	case 0x2029: /* PARAGRAPH SEPARATOR */
3577	break;
3578	}
3579	break;
3580
3581	case OP_NOT_DIGIT:
3582	if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3583	RRETURN(MATCH_NOMATCH);
3584	break;
3585
3586	case OP_DIGIT:
3587	if (c >= 256 \|\| (md->ctypes[c] & ctype_digit) == 0)
3588	RRETURN(MATCH_NOMATCH);
3589	break;
3590
3591	case OP_NOT_WHITESPACE:
3592	if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3593	RRETURN(MATCH_NOMATCH);
3594	break;
3595
3596	case OP_WHITESPACE:
3597	if (c >= 256 \|\| (md->ctypes[c] & ctype_space) == 0)
3598	RRETURN(MATCH_NOMATCH);
3599	break;
3600
3601	case OP_NOT_WORDCHAR:
3602	if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3603	RRETURN(MATCH_NOMATCH);
3604	break;
3605
3606	case OP_WORDCHAR:
3607	if (c >= 256 \|\| (md->ctypes[c] & ctype_word) == 0)
3608	RRETURN(MATCH_NOMATCH);
3609	break;
3610
3611	default:
3612	RRETURN(PCRE_ERROR_INTERNAL);
3613	}
3614	}
3615	}
3616	else
3617	#endif
3618	/* Not UTF-8 mode */
3619	{
3620	for (fi = min;; fi++)
3621	{
3622	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3623	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3624	if (fi >= max \|\| eptr >= md->end_subject \|\|
3625	(ctype == OP_ANY && IS_NEWLINE(eptr)))
3626	RRETURN(MATCH_NOMATCH);
3627
3628	c = *eptr++;
3629	switch(ctype)
3630	{
3631	case OP_ANY: /* This is the non-NL case */
3632	case OP_ALLANY:
3633	case OP_ANYBYTE:
3634	break;
3635
3636	case OP_ANYNL:
3637	switch(c)
3638	{
3639	default: RRETURN(MATCH_NOMATCH);
3640	case 0x000d:
3641	if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3642	break;
3643
3644	case 0x000a:
3645	break;
3646
3647	case 0x000b:
3648	case 0x000c:
3649	case 0x0085:
3650	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3651	break;
3652	}
3653	break;
3654
3655	case OP_NOT_HSPACE:
3656	switch(c)
3657	{
3658	default: break;
3659	case 0x09: /* HT */
3660	case 0x20: /* SPACE */
3661	case 0xa0: /* NBSP */
3662	RRETURN(MATCH_NOMATCH);
3663	}
3664	break;
3665
3666	case OP_HSPACE:
3667	switch(c)
3668	{
3669	default: RRETURN(MATCH_NOMATCH);
3670	case 0x09: /* HT */
3671	case 0x20: /* SPACE */
3672	case 0xa0: /* NBSP */
3673	break;
3674	}
3675	break;
3676
3677	case OP_NOT_VSPACE:
3678	switch(c)
3679	{
3680	default: break;
3681	case 0x0a: /* LF */
3682	case 0x0b: /* VT */
3683	case 0x0c: /* FF */
3684	case 0x0d: /* CR */
3685	case 0x85: /* NEL */
3686	RRETURN(MATCH_NOMATCH);
3687	}
3688	break;
3689
3690	case OP_VSPACE:
3691	switch(c)
3692	{
3693	default: RRETURN(MATCH_NOMATCH);
3694	case 0x0a: /* LF */
3695	case 0x0b: /* VT */
3696	case 0x0c: /* FF */
3697	case 0x0d: /* CR */
3698	case 0x85: /* NEL */
3699	break;
3700	}
3701	break;
3702
3703	case OP_NOT_DIGIT:
3704	if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3705	break;
3706
3707	case OP_DIGIT:
3708	if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3709	break;
3710
3711	case OP_NOT_WHITESPACE:
3712	if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3713	break;
3714
3715	case OP_WHITESPACE:
3716	if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3717	break;
3718
3719	case OP_NOT_WORDCHAR:
3720	if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3721	break;
3722
3723	case OP_WORDCHAR:
3724	if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3725	break;
3726
3727	default:
3728	RRETURN(PCRE_ERROR_INTERNAL);
3729	}
3730	}
3731	}
3732	/* Control never gets here */
3733	}
3734
3735	/* If maximizing, it is worth using inline code for speed, doing the type
3736	test once at the start (i.e. keep it out of the loop). Again, keep the
3737	UTF-8 and UCP stuff separate. */
3738
3739	else
3740	{
3741	pp = eptr; /* Remember where we started */
3742
3743	#ifdef SUPPORT_UCP
3744	if (prop_type >= 0)
3745	{
3746	switch(prop_type)
3747	{
3748	case PT_ANY:
3749	for (i = min; i < max; i++)
3750	{
3751	int len = 1;
3752	if (eptr >= md->end_subject) break;
3753	GETCHARLEN(c, eptr, len);
3754	if (prop_fail_result) break;
3755	eptr+= len;
3756	}
3757	break;
3758
3759	case PT_LAMP:
3760	for (i = min; i < max; i++)
3761	{
3762	int len = 1;
3763	if (eptr >= md->end_subject) break;
3764	GETCHARLEN(c, eptr, len);
3765	prop_chartype = UCD_CHARTYPE(c);
3766	if ((prop_chartype == ucp_Lu \|\|
3767	prop_chartype == ucp_Ll \|\|
3768	prop_chartype == ucp_Lt) == prop_fail_result)
3769	break;
3770	eptr+= len;
3771	}
3772	break;
3773
3774	case PT_GC:
3775	for (i = min; i < max; i++)
3776	{
3777	int len = 1;
3778	if (eptr >= md->end_subject) break;
3779	GETCHARLEN(c, eptr, len);
3780	prop_category = UCD_CATEGORY(c);
3781	if ((prop_category == prop_value) == prop_fail_result)
3782	break;
3783	eptr+= len;
3784	}
3785	break;
3786
3787	case PT_PC:
3788	for (i = min; i < max; i++)
3789	{
3790	int len = 1;
3791	if (eptr >= md->end_subject) break;
3792	GETCHARLEN(c, eptr, len);
3793	prop_chartype = UCD_CHARTYPE(c);
3794	if ((prop_chartype == prop_value) == prop_fail_result)
3795	break;
3796	eptr+= len;
3797	}
3798	break;
3799
3800	case PT_SC:
3801	for (i = min; i < max; i++)
3802	{
3803	int len = 1;
3804	if (eptr >= md->end_subject) break;
3805	GETCHARLEN(c, eptr, len);
3806	prop_script = UCD_SCRIPT(c);
3807	if ((prop_script == prop_value) == prop_fail_result)
3808	break;
3809	eptr+= len;
3810	}
3811	break;
3812	}
3813
3814	/* eptr is now past the end of the maximum run */
3815
3816	if (possessive) continue;
3817	for(;;)
3818	{
3819	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3820	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3821	if (eptr-- == pp) break; /* Stop if tried at original pos */
3822	if (utf8) BACKCHAR(eptr);
3823	}
3824	}
3825
3826	/* Match extended Unicode sequences. We will get here only if the
3827	support is in the binary; otherwise a compile-time error occurs. */
3828
3829	else if (ctype == OP_EXTUNI)
3830	{
3831	for (i = min; i < max; i++)
3832	{
3833	if (eptr >= md->end_subject) break;
3834	GETCHARINCTEST(c, eptr);
3835	prop_category = UCD_CATEGORY(c);
3836	if (prop_category == ucp_M) break;
3837	while (eptr < md->end_subject)
3838	{
3839	int len = 1;
3840	if (!utf8) c = *eptr; else
3841	{
3842	GETCHARLEN(c, eptr, len);
3843	}
3844	prop_category = UCD_CATEGORY(c);
3845	if (prop_category != ucp_M) break;
3846	eptr += len;
3847	}
3848	}
3849
3850	/* eptr is now past the end of the maximum run */
3851
3852	if (possessive) continue;
3853	for(;;)
3854	{
3855	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3856	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3857	if (eptr-- == pp) break; /* Stop if tried at original pos */
3858	for (;;) /* Move back over one extended */
3859	{
3860	int len = 1;
3861	if (!utf8) c = *eptr; else
3862	{
3863	BACKCHAR(eptr);
3864	GETCHARLEN(c, eptr, len);
3865	}
3866	prop_category = UCD_CATEGORY(c);
3867	if (prop_category != ucp_M) break;
3868	eptr--;
3869	}
3870	}
3871	}
3872
3873	else
3874	#endif /* SUPPORT_UCP */
3875
3876	#ifdef SUPPORT_UTF8
3877	/* UTF-8 mode */
3878
3879	if (utf8)
3880	{
3881	switch(ctype)
3882	{
3883	case OP_ANY:
3884	if (max < INT_MAX)
3885	{
3886	for (i = min; i < max; i++)
3887	{
3888	if (eptr >= md->end_subject \|\| IS_NEWLINE(eptr)) break;
3889	eptr++;
3890	while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3891	}
3892	}
3893
3894	/* Handle unlimited UTF-8 repeat */
3895
3896	else
3897	{
3898	for (i = min; i < max; i++)
3899	{
3900	if (eptr >= md->end_subject \|\| IS_NEWLINE(eptr)) break;
3901	eptr++;
3902	while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3903	}
3904	}
3905	break;
3906
3907	case OP_ALLANY:
3908	if (max < INT_MAX)
3909	{
3910	for (i = min; i < max; i++)
3911	{
3912	if (eptr >= md->end_subject) break;
3913	eptr++;
3914	while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3915	}
3916	}
3917	else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
3918	break;
3919
3920	/* The byte case is the same as non-UTF8 */
3921
3922	case OP_ANYBYTE:
3923	c = max - min;
3924	if (c > (unsigned int)(md->end_subject - eptr))
3925	c = md->end_subject - eptr;
3926	eptr += c;
3927	break;
3928
3929	case OP_ANYNL:
3930	for (i = min; i < max; i++)
3931	{
3932	int len = 1;
3933	if (eptr >= md->end_subject) break;
3934	GETCHARLEN(c, eptr, len);
3935	if (c == 0x000d)
3936	{
3937	if (++eptr >= md->end_subject) break;
3938	if (*eptr == 0x000a) eptr++;
3939	}
3940	else
3941	{
3942	if (c != 0x000a &&
3943	(md->bsr_anycrlf \|\|
3944	(c != 0x000b && c != 0x000c &&
3945	c != 0x0085 && c != 0x2028 && c != 0x2029)))
3946	break;
3947	eptr += len;
3948	}
3949	}
3950	break;
3951
3952	case OP_NOT_HSPACE:
3953	case OP_HSPACE:
3954	for (i = min; i < max; i++)
3955	{
3956	BOOL gotspace;
3957	int len = 1;
3958	if (eptr >= md->end_subject) break;
3959	GETCHARLEN(c, eptr, len);
3960	switch(c)
3961	{
3962	default: gotspace = FALSE; break;
3963	case 0x09: /* HT */
3964	case 0x20: /* SPACE */
3965	case 0xa0: /* NBSP */
3966	case 0x1680: /* OGHAM SPACE MARK */
3967	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3968	case 0x2000: /* EN QUAD */
3969	case 0x2001: /* EM QUAD */
3970	case 0x2002: /* EN SPACE */
3971	case 0x2003: /* EM SPACE */
3972	case 0x2004: /* THREE-PER-EM SPACE */
3973	case 0x2005: /* FOUR-PER-EM SPACE */
3974	case 0x2006: /* SIX-PER-EM SPACE */
3975	case 0x2007: /* FIGURE SPACE */
3976	case 0x2008: /* PUNCTUATION SPACE */
3977	case 0x2009: /* THIN SPACE */
3978	case 0x200A: /* HAIR SPACE */
3979	case 0x202f: /* NARROW NO-BREAK SPACE */
3980	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3981	case 0x3000: /* IDEOGRAPHIC SPACE */
3982	gotspace = TRUE;
3983	break;
3984	}
3985	if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3986	eptr += len;
3987	}
3988	break;
3989
3990	case OP_NOT_VSPACE:
3991	case OP_VSPACE:
3992	for (i = min; i < max; i++)
3993	{
3994	BOOL gotspace;
3995	int len = 1;
3996	if (eptr >= md->end_subject) break;
3997	GETCHARLEN(c, eptr, len);
3998	switch(c)
3999	{
4000	default: gotspace = FALSE; break;
4001	case 0x0a: /* LF */
4002	case 0x0b: /* VT */
4003	case 0x0c: /* FF */
4004	case 0x0d: /* CR */
4005	case 0x85: /* NEL */
4006	case 0x2028: /* LINE SEPARATOR */
4007	case 0x2029: /* PARAGRAPH SEPARATOR */
4008	gotspace = TRUE;
4009	break;
4010	}
4011	if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4012	eptr += len;
4013	}
4014	break;
4015
4016	case OP_NOT_DIGIT:
4017	for (i = min; i < max; i++)
4018	{
4019	int len = 1;
4020	if (eptr >= md->end_subject) break;
4021	GETCHARLEN(c, eptr, len);
4022	if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4023	eptr+= len;
4024	}
4025	break;
4026
4027	case OP_DIGIT:
4028	for (i = min; i < max; i++)
4029	{
4030	int len = 1;
4031	if (eptr >= md->end_subject) break;
4032	GETCHARLEN(c, eptr, len);
4033	if (c >= 256 \|\|(md->ctypes[c] & ctype_digit) == 0) break;
4034	eptr+= len;
4035	}
4036	break;
4037
4038	case OP_NOT_WHITESPACE:
4039	for (i = min; i < max; i++)
4040	{
4041	int len = 1;
4042	if (eptr >= md->end_subject) break;
4043	GETCHARLEN(c, eptr, len);
4044	if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4045	eptr+= len;
4046	}
4047	break;
4048
4049	case OP_WHITESPACE:
4050	for (i = min; i < max; i++)
4051	{
4052	int len = 1;
4053	if (eptr >= md->end_subject) break;
4054	GETCHARLEN(c, eptr, len);
4055	if (c >= 256 \|\|(md->ctypes[c] & ctype_space) == 0) break;
4056	eptr+= len;
4057	}
4058	break;
4059
4060	case OP_NOT_WORDCHAR:
4061	for (i = min; i < max; i++)
4062	{
4063	int len = 1;
4064	if (eptr >= md->end_subject) break;
4065	GETCHARLEN(c, eptr, len);
4066	if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4067	eptr+= len;
4068	}
4069	break;
4070
4071	case OP_WORDCHAR:
4072	for (i = min; i < max; i++)
4073	{
4074	int len = 1;
4075	if (eptr >= md->end_subject) break;
4076	GETCHARLEN(c, eptr, len);
4077	if (c >= 256 \|\| (md->ctypes[c] & ctype_word) == 0) break;
4078	eptr+= len;
4079	}
4080	break;
4081
4082	default:
4083	RRETURN(PCRE_ERROR_INTERNAL);
4084	}
4085
4086	/* eptr is now past the end of the maximum run */
4087
4088	if (possessive) continue;
4089	for(;;)
4090	{
4091	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4092	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4093	if (eptr-- == pp) break; /* Stop if tried at original pos */
4094	BACKCHAR(eptr);
4095	}
4096	}
4097	else
4098	#endif /* SUPPORT_UTF8 */
4099
4100	/* Not UTF-8 mode */
4101	{
4102	switch(ctype)
4103	{
4104	case OP_ANY:
4105	for (i = min; i < max; i++)
4106	{
4107	if (eptr >= md->end_subject \|\| IS_NEWLINE(eptr)) break;
4108	eptr++;
4109	}
4110	break;
4111
4112	case OP_ALLANY:
4113	case OP_ANYBYTE:
4114	c = max - min;
4115	if (c > (unsigned int)(md->end_subject - eptr))
4116	c = md->end_subject - eptr;
4117	eptr += c;
4118	break;
4119
4120	case OP_ANYNL:
4121	for (i = min; i < max; i++)
4122	{
4123	if (eptr >= md->end_subject) break;
4124	c = *eptr;
4125	if (c == 0x000d)
4126	{
4127	if (++eptr >= md->end_subject) break;
4128	if (*eptr == 0x000a) eptr++;
4129	}
4130	else
4131	{
4132	if (c != 0x000a &&
4133	(md->bsr_anycrlf \|\|
4134	(c != 0x000b && c != 0x000c && c != 0x0085)))
4135	break;
4136	eptr++;
4137	}
4138	}
4139	break;
4140
4141	case OP_NOT_HSPACE:
4142	for (i = min; i < max; i++)
4143	{
4144	if (eptr >= md->end_subject) break;
4145	c = *eptr;
4146	if (c == 0x09 \|\| c == 0x20 \|\| c == 0xa0) break;
4147	eptr++;
4148	}
4149	break;
4150
4151	case OP_HSPACE:
4152	for (i = min; i < max; i++)
4153	{
4154	if (eptr >= md->end_subject) break;
4155	c = *eptr;
4156	if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4157	eptr++;
4158	}
4159	break;
4160
4161	case OP_NOT_VSPACE:
4162	for (i = min; i < max; i++)
4163	{
4164	if (eptr >= md->end_subject) break;
4165	c = *eptr;
4166	if (c == 0x0a \|\| c == 0x0b \|\| c == 0x0c \|\| c == 0x0d \|\| c == 0x85)
4167	break;
4168	eptr++;
4169	}
4170	break;
4171
4172	case OP_VSPACE:
4173	for (i = min; i < max; i++)
4174	{
4175	if (eptr >= md->end_subject) break;
4176	c = *eptr;
4177	if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4178	break;
4179	eptr++;
4180	}
4181	break;
4182
4183	case OP_NOT_DIGIT:
4184	for (i = min; i < max; i++)
4185	{
4186	if (eptr >= md->end_subject \|\| (md->ctypes[*eptr] & ctype_digit) != 0)
4187	break;
4188	eptr++;
4189	}
4190	break;
4191
4192	case OP_DIGIT:
4193	for (i = min; i < max; i++)
4194	{
4195	if (eptr >= md->end_subject \|\| (md->ctypes[*eptr] & ctype_digit) == 0)
4196	break;
4197	eptr++;
4198	}
4199	break;
4200
4201	case OP_NOT_WHITESPACE:
4202	for (i = min; i < max; i++)
4203	{
4204	if (eptr >= md->end_subject \|\| (md->ctypes[*eptr] & ctype_space) != 0)
4205	break;
4206	eptr++;
4207	}
4208	break;
4209
4210	case OP_WHITESPACE:
4211	for (i = min; i < max; i++)
4212	{
4213	if (eptr >= md->end_subject \|\| (md->ctypes[*eptr] & ctype_space) == 0)
4214	break;
4215	eptr++;
4216	}
4217	break;
4218
4219	case OP_NOT_WORDCHAR:
4220	for (i = min; i < max; i++)
4221	{
4222	if (eptr >= md->end_subject \|\| (md->ctypes[*eptr] & ctype_word) != 0)
4223	break;
4224	eptr++;
4225	}
4226	break;
4227
4228	case OP_WORDCHAR:
4229	for (i = min; i < max; i++)
4230	{
4231	if (eptr >= md->end_subject \|\| (md->ctypes[*eptr] & ctype_word) == 0)
4232	break;
4233	eptr++;
4234	}
4235	break;
4236
4237	default:
4238	RRETURN(PCRE_ERROR_INTERNAL);
4239	}
4240
4241	/* eptr is now past the end of the maximum run */
4242
4243	if (possessive) continue;
4244	while (eptr >= pp)
4245	{
4246	RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4247	eptr--;
4248	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4249	}
4250	}
4251
4252	/* Get here if we can't make it match with any permitted repetitions */
4253
4254	RRETURN(MATCH_NOMATCH);
4255	}
4256	/* Control never gets here */
4257
4258	/* There's been some horrible disaster. Arrival here can only mean there is
4259	something seriously wrong in the code above or the OP_xxx definitions. */
4260
4261	default:
4262	DPRINTF(("Unknown opcode %d\n", *ecode));
4263	RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4264	}
4265
4266	/* Do not stick any code in here without much thought; it is assumed
4267	that "continue" in the code above comes out to here to repeat the main
4268	loop. */
4269
4270	} /* End of main loop */
4271	/* Control never reaches here */
4272
4273
4274	/* When compiling to use the heap rather than the stack for recursive calls to
4275	match(), the RRETURN() macro jumps here. The number that is saved in
4276	frame->Xwhere indicates which label we actually want to return to. */
4277
4278	#ifdef NO_RECURSE
4279	#define LBL(val) case val: goto L_RM##val;
4280	HEAP_RETURN:
4281	switch (frame->Xwhere)
4282	{
4283	LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4284	LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4285	LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4286	LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4287	LBL(53) LBL(54)
4288	#ifdef SUPPORT_UTF8
4289	LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4290	LBL(32) LBL(34) LBL(42) LBL(46)
4291	#ifdef SUPPORT_UCP
4292	LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4293	#endif /* SUPPORT_UCP */
4294	#endif /* SUPPORT_UTF8 */
4295	default:
4296	DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4297	return PCRE_ERROR_INTERNAL;
4298	}
4299	#undef LBL
4300	#endif /* NO_RECURSE */
4301	}
4302
4303
4304	/***************************************************************************
4305	****************************************************************************
4306	RECURSION IN THE match() FUNCTION
4307
4308	Undefine all the macros that were defined above to handle this. */
4309
4310	#ifdef NO_RECURSE
4311	#undef eptr
4312	#undef ecode
4313	#undef mstart
4314	#undef offset_top
4315	#undef ims
4316	#undef eptrb
4317	#undef flags
4318
4319	#undef callpat
4320	#undef charptr
4321	#undef data
4322	#undef next
4323	#undef pp
4324	#undef prev
4325	#undef saved_eptr
4326
4327	#undef new_recursive
4328
4329	#undef cur_is_word
4330	#undef condition
4331	#undef prev_is_word
4332
4333	#undef original_ims
4334
4335	#undef ctype
4336	#undef length
4337	#undef max
4338	#undef min
4339	#undef number
4340	#undef offset
4341	#undef op
4342	#undef save_capture_last
4343	#undef save_offset1
4344	#undef save_offset2
4345	#undef save_offset3
4346	#undef stacksave
4347
4348	#undef newptrb
4349
4350	#endif
4351
4352	/* These two are defined as macros in both cases */
4353
4354	#undef fc
4355	#undef fi
4356
4357	/***************************************************************************
4358	***************************************************************************/
4359
4360
4361
4362	/*************************************************
4363	* Execute a Regular Expression *
4364	*************************************************/
4365
4366	/* This function applies a compiled re to a subject string and picks out
4367	portions of the string if it matches. Two elements in the vector are set for
4368	each substring: the offsets to the start and end of the substring.
4369
4370	Arguments:
4371	argument_re points to the compiled expression
4372	extra_data points to extra data or is NULL
4373	subject points to the subject string
4374	length length of subject string (may contain binary zeros)
4375	start_offset where to start in the subject string
4376	options option bits
4377	offsets points to a vector of ints to be filled in with offsets
4378	offsetcount the number of elements in the vector
4379
4380	Returns: > 0 => success; value is the number of elements filled in
4381	= 0 => success, but offsets is not big enough
4382	-1 => failed to match
4383	< -1 => some kind of unexpected problem
4384	*/
4385
4386	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4387	pcre_exec(const pcre argument_re, const pcre_extra extra_data,
4388	PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4389	int offsetcount)
4390	{
4391	int rc, resetcount, ocount;
4392	int first_byte = -1;
4393	int req_byte = -1;
4394	int req_byte2 = -1;
4395	int newline;
4396	unsigned long int ims;
4397	BOOL using_temporary_offsets = FALSE;
4398	BOOL anchored;
4399	BOOL startline;
4400	BOOL firstline;
4401	BOOL first_byte_caseless = FALSE;
4402	BOOL req_byte_caseless = FALSE;
4403	BOOL utf8;
4404	match_data match_block;
4405	match_data *md = &match_block;
4406	const uschar *tables;
4407	const uschar *start_bits = NULL;
4408	USPTR start_match = (USPTR)subject + start_offset;
4409	USPTR end_subject;
4410	USPTR req_byte_ptr = start_match - 1;
4411
4412	pcre_study_data internal_study;
4413	const pcre_study_data *study;
4414
4415	real_pcre internal_re;
4416	const real_pcre external_re = (const real_pcre )argument_re;
4417	const real_pcre *re = external_re;
4418
4419	/* Plausibility checks */
4420
4421	if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4422	if (re == NULL \|\| subject == NULL \|\|
4423	(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4424	if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4425
4426	/* Fish out the optional data from the extra_data structure, first setting
4427	the default values. */
4428
4429	study = NULL;
4430	md->match_limit = MATCH_LIMIT;
4431	md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4432	md->callout_data = NULL;
4433
4434	/* The table pointer is always in native byte order. */
4435
4436	tables = external_re->tables;
4437
4438	if (extra_data != NULL)
4439	{
4440	register unsigned int flags = extra_data->flags;
4441	if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4442	study = (const pcre_study_data *)extra_data->study_data;
4443	if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4444	md->match_limit = extra_data->match_limit;
4445	if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4446	md->match_limit_recursion = extra_data->match_limit_recursion;
4447	if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4448	md->callout_data = extra_data->callout_data;
4449	if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4450	}
4451
4452	/* If the exec call supplied NULL for tables, use the inbuilt ones. This
4453	is a feature that makes it possible to save compiled regex and re-use them
4454	in other programs later. */
4455
4456	if (tables == NULL) tables = _pcre_default_tables;
4457
4458	/* Check that the first field in the block is the magic number. If it is not,
4459	test for a regex that was compiled on a host of opposite endianness. If this is
4460	the case, flipped values are put in internal_re and internal_study if there was
4461	study data too. */
4462
4463	if (re->magic_number != MAGIC_NUMBER)
4464	{
4465	re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4466	if (re == NULL) return PCRE_ERROR_BADMAGIC;
4467	if (study != NULL) study = &internal_study;
4468	}
4469
4470	/* Set up other data */
4471
4472	anchored = ((re->options \| options) & PCRE_ANCHORED) != 0;
4473	startline = (re->flags & PCRE_STARTLINE) != 0;
4474	firstline = (re->options & PCRE_FIRSTLINE) != 0;
4475
4476	/* The code starts after the real_pcre block and the capture name table. */
4477
4478	md->start_code = (const uschar *)external_re + re->name_table_offset +
4479	re->name_count * re->name_entry_size;
4480
4481	md->start_subject = (USPTR)subject;
4482	md->start_offset = start_offset;
4483	md->end_subject = md->start_subject + length;
4484	end_subject = md->end_subject;
4485
4486	md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4487	utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4488	md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4489
4490	md->notbol = (options & PCRE_NOTBOL) != 0;
4491	md->noteol = (options & PCRE_NOTEOL) != 0;
4492	md->notempty = (options & PCRE_NOTEMPTY) != 0;
4493	md->partial = (options & PCRE_PARTIAL) != 0;
4494	md->hitend = FALSE;
4495
4496	md->recursive = NULL; /* No recursion at top level */
4497
4498	md->lcc = tables + lcc_offset;
4499	md->ctypes = tables + ctypes_offset;
4500
4501	/* Handle different \R options. */
4502
4503	switch (options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE))
4504	{
4505	case 0:
4506	if ((re->options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE)) != 0)
4507	md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4508	else
4509	#ifdef BSR_ANYCRLF
4510	md->bsr_anycrlf = TRUE;
4511	#else
4512	md->bsr_anycrlf = FALSE;
4513	#endif
4514	break;
4515
4516	case PCRE_BSR_ANYCRLF:
4517	md->bsr_anycrlf = TRUE;
4518	break;
4519
4520	case PCRE_BSR_UNICODE:
4521	md->bsr_anycrlf = FALSE;
4522	break;
4523
4524	default: return PCRE_ERROR_BADNEWLINE;
4525	}
4526
4527	/* Handle different types of newline. The three bits give eight cases. If
4528	nothing is set at run time, whatever was used at compile time applies. */
4529
4530	switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4531	(pcre_uint32)options) & PCRE_NEWLINE_BITS)
4532	{
4533	case 0: newline = NEWLINE; break; /* Compile-time default */
4534	case PCRE_NEWLINE_CR: newline = '\r'; break;
4535	case PCRE_NEWLINE_LF: newline = '\n'; break;
4536	case PCRE_NEWLINE_CR+
4537	PCRE_NEWLINE_LF: newline = ('\r' << 8) \| '\n'; break;
4538	case PCRE_NEWLINE_ANY: newline = -1; break;
4539	case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4540	default: return PCRE_ERROR_BADNEWLINE;
4541	}
4542
4543	if (newline == -2)
4544	{
4545	md->nltype = NLTYPE_ANYCRLF;
4546	}
4547	else if (newline < 0)
4548	{
4549	md->nltype = NLTYPE_ANY;
4550	}
4551	else
4552	{
4553	md->nltype = NLTYPE_FIXED;
4554	if (newline > 255)
4555	{
4556	md->nllen = 2;
4557	md->nl[0] = (newline >> 8) & 255;
4558	md->nl[1] = newline & 255;
4559	}
4560	else
4561	{
4562	md->nllen = 1;
4563	md->nl[0] = newline;
4564	}
4565	}
4566
4567	/* Partial matching is supported only for a restricted set of regexes at the
4568	moment. */
4569
4570	if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4571	return PCRE_ERROR_BADPARTIAL;
4572
4573	/* Check a UTF-8 string if required. Unfortunately there's no way of passing
4574	back the character offset. */
4575
4576	#ifdef SUPPORT_UTF8
4577	if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4578	{
4579	if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4580	return PCRE_ERROR_BADUTF8;
4581	if (start_offset > 0 && start_offset < length)
4582	{
4583	int tb = ((uschar *)subject)[start_offset];
4584	if (tb > 127)
4585	{
4586	tb &= 0xc0;
4587	if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4588	}
4589	}
4590	}
4591	#endif
4592
4593	/* The ims options can vary during the matching as a result of the presence
4594	of (?ims) items in the pattern. They are kept in a local variable so that
4595	restoring at the exit of a group is easy. */
4596
4597	ims = re->options & (PCRE_CASELESS\|PCRE_MULTILINE\|PCRE_DOTALL);
4598
4599	/* If the expression has got more back references than the offsets supplied can
4600	hold, we get a temporary chunk of working store to use during the matching.
4601	Otherwise, we can use the vector supplied, rounding down its size to a multiple
4602	of 3. */
4603
4604	ocount = offsetcount - (offsetcount % 3);
4605
4606	if (re->top_backref > 0 && re->top_backref >= ocount/3)
4607	{
4608	ocount = re->top_backref * 3 + 3;
4609	md->offset_vector = (int )(pcre_malloc)(ocount sizeof(int));
4610	if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4611	using_temporary_offsets = TRUE;
4612	DPRINTF(("Got memory to hold back references\n"));
4613	}
4614	else md->offset_vector = offsets;
4615
4616	md->offset_end = ocount;
4617	md->offset_max = (2*ocount)/3;
4618	md->offset_overflow = FALSE;
4619	md->capture_last = -1;
4620
4621	/* Compute the minimum number of offsets that we need to reset each time. Doing
4622	this makes a huge difference to execution time when there aren't many brackets
4623	in the pattern. */
4624
4625	resetcount = 2 + re->top_bracket * 2;
4626	if (resetcount > offsetcount) resetcount = ocount;
4627
4628	/* Reset the working variable associated with each extraction. These should
4629	never be used unless previously set, but they get saved and restored, and so we
4630	initialize them to avoid reading uninitialized locations. */
4631
4632	if (md->offset_vector != NULL)
4633	{
4634	register int *iptr = md->offset_vector + ocount;
4635	register int *iend = iptr - resetcount/2 + 1;
4636	while (--iptr >= iend) *iptr = -1;
4637	}
4638
4639	/* Set up the first character to match, if available. The first_byte value is
4640	never set for an anchored regular expression, but the anchoring may be forced
4641	at run time, so we have to test for anchoring. The first char may be unset for
4642	an unanchored pattern, of course. If there's no first char and the pattern was
4643	studied, there may be a bitmap of possible first characters. */
4644
4645	if (!anchored)
4646	{
4647	if ((re->flags & PCRE_FIRSTSET) != 0)
4648	{
4649	first_byte = re->first_byte & 255;
4650	if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4651	first_byte = md->lcc[first_byte];
4652	}
4653	else
4654	if (!startline && study != NULL &&
4655	(study->options & PCRE_STUDY_MAPPED) != 0)
4656	start_bits = study->start_bits;
4657	}
4658
4659	/* For anchored or unanchored matches, there may be a "last known required
4660	character" set. */
4661
4662	if ((re->flags & PCRE_REQCHSET) != 0)
4663	{
4664	req_byte = re->req_byte & 255;
4665	req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4666	req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
4667	}
4668
4669
4670	/* ==========================================================================*/
4671
4672	/* Loop for handling unanchored repeated matching attempts; for anchored regexs
4673	the loop runs just once. */
4674
4675	for(;;)
4676	{
4677	USPTR save_end_subject = end_subject;
4678	USPTR new_start_match;
4679
4680	/* Reset the maximum number of extractions we might see. */
4681
4682	if (md->offset_vector != NULL)
4683	{
4684	register int *iptr = md->offset_vector;
4685	register int *iend = iptr + resetcount;
4686	while (iptr < iend) *iptr++ = -1;
4687	}
4688
4689	/* Advance to a unique first char if possible. If firstline is TRUE, the
4690	start of the match is constrained to the first line of a multiline string.
4691	That is, the match must be before or at the first newline. Implement this by
4692	temporarily adjusting end_subject so that we stop scanning at a newline. If
4693	the match fails at the newline, later code breaks this loop. */
4694
4695	if (firstline)
4696	{
4697	USPTR t = start_match;
4698	#ifdef SUPPORT_UTF8
4699	if (utf8)
4700	{
4701	while (t < md->end_subject && !IS_NEWLINE(t))
4702	{
4703	t++;
4704	while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4705	}
4706	}
4707	else
4708	#endif
4709	while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4710	end_subject = t;
4711	}
4712
4713	/* Now advance to a unique first byte if there is one. */
4714
4715	if (first_byte >= 0)
4716	{
4717	if (first_byte_caseless)
4718	while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4719	start_match++;
4720	else
4721	while (start_match < end_subject && *start_match != first_byte)
4722	start_match++;
4723	}
4724
4725	/* Or to just after a linebreak for a multiline match */
4726
4727	else if (startline)
4728	{
4729	if (start_match > md->start_subject + start_offset)
4730	{
4731	#ifdef SUPPORT_UTF8
4732	if (utf8)
4733	{
4734	while (start_match < end_subject && !WAS_NEWLINE(start_match))
4735	{
4736	start_match++;
4737	while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4738	start_match++;
4739	}
4740	}
4741	else
4742	#endif
4743	while (start_match < end_subject && !WAS_NEWLINE(start_match))
4744	start_match++;
4745
4746	/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4747	and we are now at a LF, advance the match position by one more character.
4748	*/
4749
4750	if (start_match[-1] == '\r' &&
4751	(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&
4752	start_match < end_subject &&
4753	*start_match == '\n')
4754	start_match++;
4755	}
4756	}
4757
4758	/* Or to a non-unique first byte after study */
4759
4760	else if (start_bits != NULL)
4761	{
4762	while (start_match < end_subject)
4763	{
4764	register unsigned int c = *start_match;
4765	if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4766	else break;
4767	}
4768	}
4769
4770	/* Restore fudged end_subject */
4771
4772	end_subject = save_end_subject;
4773
4774	#ifdef DEBUG /* Sigh. Some compilers never learn. */
4775	printf(">>>> Match against: ");
4776	pchars(start_match, end_subject - start_match, TRUE, md);
4777	printf("\n");
4778	#endif
4779
4780	/* If req_byte is set, we know that that character must appear in the subject
4781	for the match to succeed. If the first character is set, req_byte must be
4782	later in the subject; otherwise the test starts at the match point. This
4783	optimization can save a huge amount of backtracking in patterns with nested
4784	unlimited repeats that aren't going to match. Writing separate code for
4785	cased/caseless versions makes it go faster, as does using an autoincrement
4786	and backing off on a match.
4787
4788	HOWEVER: when the subject string is very, very long, searching to its end can
4789	take a long time, and give bad performance on quite ordinary patterns. This
4790	showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4791	string... so we don't do this when the string is sufficiently long.
4792
4793	ALSO: this processing is disabled when partial matching is requested.
4794	*/
4795
4796	if (req_byte >= 0 &&
4797	end_subject - start_match < REQ_BYTE_MAX &&
4798	!md->partial)
4799	{
4800	register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4801
4802	/* We don't need to repeat the search if we haven't yet reached the
4803	place we found it at last time. */
4804
4805	if (p > req_byte_ptr)
4806	{
4807	if (req_byte_caseless)
4808	{
4809	while (p < end_subject)
4810	{
4811	register int pp = *p++;
4812	if (pp == req_byte \|\| pp == req_byte2) { p--; break; }
4813	}
4814	}
4815	else
4816	{
4817	while (p < end_subject)
4818	{
4819	if (*p++ == req_byte) { p--; break; }
4820	}
4821	}
4822
4823	/* If we can't find the required character, break the matching loop,
4824	forcing a match failure. */
4825
4826	if (p >= end_subject)
4827	{
4828	rc = MATCH_NOMATCH;
4829	break;
4830	}
4831
4832	/* If we have found the required character, save the point where we
4833	found it, so that we don't search again next time round the loop if
4834	the start hasn't passed this character yet. */
4835
4836	req_byte_ptr = p;
4837	}
4838	}
4839
4840	/* OK, we can now run the match. */
4841
4842	md->start_match_ptr = start_match;
4843	md->match_call_count = 0;
4844	rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4845
4846	switch(rc)
4847	{
4848	/* NOMATCH and PRUNE advance by one character. THEN at this level acts
4849	exactly like PRUNE. */
4850
4851	case MATCH_NOMATCH:
4852	case MATCH_PRUNE:
4853	case MATCH_THEN:
4854	new_start_match = start_match + 1;
4855	#ifdef SUPPORT_UTF8
4856	if (utf8)
4857	while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4858	new_start_match++;
4859	#endif
4860	break;
4861
4862	/* SKIP passes back the next starting point explicitly. */
4863
4864	case MATCH_SKIP:
4865	new_start_match = md->start_match_ptr;
4866	break;
4867
4868	/* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4869
4870	case MATCH_COMMIT:
4871	rc = MATCH_NOMATCH;
4872	goto ENDLOOP;
4873
4874	/* Any other return is some kind of error. */
4875
4876	default:
4877	goto ENDLOOP;
4878	}
4879
4880	/* Control reaches here for the various types of "no match at this point"
4881	result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4882
4883	rc = MATCH_NOMATCH;
4884
4885	/* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4886	newline in the subject (though it may continue over the newline). Therefore,
4887	if we have just failed to match, starting at a newline, do not continue. */
4888
4889	if (firstline && IS_NEWLINE(start_match)) break;
4890
4891	/* Advance to new matching position */
4892
4893	start_match = new_start_match;
4894
4895	/* Break the loop if the pattern is anchored or if we have passed the end of
4896	the subject. */
4897
4898	if (anchored \|\| start_match > end_subject) break;
4899
4900	/* If we have just passed a CR and we are now at a LF, and the pattern does
4901	not contain any explicit matches for \r or \n, and the newline option is CRLF
4902	or ANY or ANYCRLF, advance the match position by one more character. */
4903
4904	if (start_match[-1] == '\r' &&
4905	start_match < end_subject &&
4906	*start_match == '\n' &&
4907	(re->flags & PCRE_HASCRORLF) == 0 &&
4908	(md->nltype == NLTYPE_ANY \|\|
4909	md->nltype == NLTYPE_ANYCRLF \|\|
4910	md->nllen == 2))
4911	start_match++;
4912
4913	} /* End of for(;;) "bumpalong" loop */
4914
4915	/* ==========================================================================*/
4916
4917	/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4918	conditions is true:
4919
4920	(1) The pattern is anchored or the match was failed by (*COMMIT);
4921
4922	(2) We are past the end of the subject;
4923
4924	(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4925	this option requests that a match occur at or before the first newline in
4926	the subject.
4927
4928	When we have a match and the offset vector is big enough to deal with any
4929	backreferences, captured substring offsets will already be set up. In the case
4930	where we had to get some local store to hold offsets for backreference
4931	processing, copy those that we can. In this case there need not be overflow if
4932	certain parts of the pattern were not used, even though there are more
4933	capturing parentheses than vector slots. */
4934
4935	ENDLOOP:
4936
4937	if (rc == MATCH_MATCH)
4938	{
4939	if (using_temporary_offsets)
4940	{
4941	if (offsetcount >= 4)
4942	{
4943	memcpy(offsets + 2, md->offset_vector + 2,
4944	(offsetcount - 2) * sizeof(int));
4945	DPRINTF(("Copied offsets from temporary memory\n"));
4946	}
4947	if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4948	DPRINTF(("Freeing temporary memory\n"));
4949	(pcre_free)(md->offset_vector);
4950	}
4951
4952	/* Set the return code to the number of captured strings, or 0 if there are
4953	too many to fit into the vector. */
4954
4955	rc = md->offset_overflow? 0 : md->end_offset_top/2;
4956
4957	/* If there is space, set up the whole thing as substring 0. The value of
4958	md->start_match_ptr might be modified if \K was encountered on the success
4959	matching path. */
4960
4961	if (offsetcount < 2) rc = 0; else
4962	{
4963	offsets[0] = md->start_match_ptr - md->start_subject;
4964	offsets[1] = md->end_match_ptr - md->start_subject;
4965	}
4966
4967	DPRINTF((">>>> returning %d\n", rc));
4968	return rc;
4969	}
4970
4971	/* Control gets here if there has been an error, or if the overall match
4972	attempt has failed at all permitted starting positions. */
4973
4974	if (using_temporary_offsets)
4975	{
4976	DPRINTF(("Freeing temporary memory\n"));
4977	(pcre_free)(md->offset_vector);
4978	}
4979
4980	if (rc != MATCH_NOMATCH)
4981	{
4982	DPRINTF((">>>> error: returning %d\n", rc));
4983	return rc;
4984	}
4985	else if (md->partial && md->hitend)
4986	{
4987	DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4988	return PCRE_ERROR_PARTIAL;
4989	}
4990	else
4991	{
4992	DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4993	return PCRE_ERROR_NOMATCH;
4994	}
4995	}
4996
4997	/* End of pcre_exec.c */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: