~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/*
2
 * The matching engine and friends.  This file is #included by regexec.c
3
 * after suitable #defines of a variety of macros used herein, so that
4
 * different state representations can be used without duplicating masses
5
 * of code.
6
 */
7
8
#ifdef SNAMES
9
#define	matcher	smatcher
10
#define	fast	sfast
11
#define	slow	sslow
12
#define	dissect	sdissect
13
#define	backref	sbackref
14
#define	step	sstep
15
#define	print	sprint
16
#define	at	sat
17
#define	match	smat
18
#endif
19
#ifdef LNAMES
20
#define	matcher	lmatcher
21
#define	fast	lfast
22
#define	slow	lslow
23
#define	dissect	ldissect
24
#define	backref	lbackref
25
#define	step	lstep
26
#define	print	lprint
27
#define	at	lat
28
#define	match	lmat
29
#endif
30
31
/* another structure passed up and down to avoid zillions of parameters */
32
struct match {
33
	struct re_guts *g;
34
	int eflags;
35
	my_regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
36
	char *offp;		/* offsets work from here */
37
	char *beginp;		/* start of string -- virtual NUL precedes */
38
	char *endp;		/* end of string -- virtual NUL here */
39
	char *coldp;		/* can be no match starting before here */
40
	char **lastpos;		/* [nplus+1] */
41
	STATEVARS;
42
	states st;		/* current states */
43
	states fresh;		/* states for a fresh start */
44
	states tmp;		/* temporary */
45
	states empty;		/* empty set of states */
46
};
47
48
#include "engine.ih"
49
50
#ifdef REDEBUG
51
#define	SP(t, s, c)	print(m, t, s, c, stdout)
52
#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
53
#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
54
#else
55
#define	SP(t, s, c)	/* nothing */
56
#define	AT(t, p1, p2, s1, s2)	/* nothing */
57
#define	NOTE(s)	/* nothing */
58
#endif
59
60
/*
61
 - matcher - the actual matching engine
62
 == static int matcher(register struct re_guts *g, char *string, \
63
 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
64
 */
65
static int			/* 0 success, REG_NOMATCH failure */
66
matcher(charset,g, str, nmatch, pmatch, eflags)
67
CHARSET_INFO *charset;
68
register struct re_guts *g;
69
char *str;
70
size_t nmatch;
71
my_regmatch_t pmatch[];
72
int eflags;
73
{
74
	register char *endp;
75
	register uint i;
76
	struct match mv;
77
	register struct match *m = &mv;
78
	register char *dp;
79
	register const sopno gf = g->firststate+1;	/* +1 for OEND */
80
	register const sopno gl = g->laststate;
81
	char *start;
82
	char *stop;
83
84
	/* simplify the situation where possible */
85
	if (g->cflags&REG_NOSUB)
86
		nmatch = 0;
87
	if (eflags&REG_STARTEND) {
88
		start = str + pmatch[0].rm_so;
89
		stop = str + pmatch[0].rm_eo;
90
	} else {
91
		start = str;
92
		stop = start + strlen(start);
93
	}
94
	if (stop < start)
95
		return(REG_INVARG);
96
97
	/* prescreening; this does wonders for this rather slow code */
98
	if (g->must != NULL) {
99
		for (dp = start; dp < stop; dp++)
100
			if (*dp == g->must[0] && stop - dp >= g->mlen &&
101
				memcmp(dp, g->must, (size_t)g->mlen) == 0)
102
				break;
103
		if (dp == stop)		/* we didn't find g->must */
104
			return(REG_NOMATCH);
105
	}
106
107
	/* match struct setup */
108
	m->g = g;
109
	m->eflags = eflags;
110
	m->pmatch = NULL;
111
	m->lastpos = NULL;
112
	m->offp = str;
113
	m->beginp = start;
114
	m->endp = stop;
115
	STATESETUP(m, 4);
116
	SETUP(m->st);
117
	SETUP(m->fresh);
118
	SETUP(m->tmp);
119
	SETUP(m->empty);
120
	CLEAR(m->empty);
121
122
	/* this loop does only one repetition except for backrefs */
123
	for (;;) {
124
		endp = fast(charset, m, start, stop, gf, gl);
125
		if (endp == NULL) {		/* a miss */
126
		  if (m->pmatch != NULL)
127
		    free((char *)m->pmatch);
128
		  if (m->lastpos != NULL)
129
		    free((char *)m->lastpos);
130
		  STATETEARDOWN(m);
131
		  return(REG_NOMATCH);
132
		}
133
		if (nmatch == 0 && !g->backrefs)
134
			break;		/* no further info needed */
135
136
		/* where? */
137
		assert(m->coldp != NULL);
138
		for (;;) {
139
			NOTE("finding start");
140
			endp = slow(charset, m, m->coldp, stop, gf, gl);
141
			if (endp != NULL)
142
				break;
143
			assert(m->coldp < m->endp);
144
			m->coldp++;
145
		}
146
		if (nmatch == 1 && !g->backrefs)
147
			break;		/* no further info needed */
148
149
		/* oh my, he wants the subexpressions... */
150
		if (m->pmatch == NULL)
151
			m->pmatch = (my_regmatch_t *)malloc((m->g->nsub + 1) *
152
							sizeof(my_regmatch_t));
153
		if (m->pmatch == NULL) {
154
		  	if (m->lastpos != NULL)
155
		    	free((char *)m->lastpos);
156
			STATETEARDOWN(m);
157
			return(REG_ESPACE);
158
		}
159
		for (i = 1; i <= m->g->nsub; i++)
160
			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
161
		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
162
			NOTE("dissecting");
163
			dp = dissect(charset, m, m->coldp, endp, gf, gl);
164
		} else {
165
			if (g->nplus > 0 && m->lastpos == NULL)
166
				m->lastpos = (char **)malloc((g->nplus+1) *
167
							sizeof(char *));
168
			if (g->nplus > 0 && m->lastpos == NULL) {
169
				free(m->pmatch);
170
				STATETEARDOWN(m);
171
				return(REG_ESPACE);
172
			}
173
			NOTE("backref dissect");
174
			dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
175
		}
176
		if (dp != NULL)
177
			break;
178
179
		/* uh-oh... we couldn't find a subexpression-level match */
180
		assert(g->backrefs);	/* must be back references doing it */
181
		assert(g->nplus == 0 || m->lastpos != NULL);
182
		for (;;) {
183
			if (dp != NULL || endp <= m->coldp)
184
				break;		/* defeat */
185
			NOTE("backoff");
186
			endp = slow(charset, m, m->coldp, endp-1, gf, gl);
187
			if (endp == NULL)
188
				break;		/* defeat */
189
			/* try it on a shorter possibility */
190
#ifndef NDEBUG
191
			for (i = 1; i <= m->g->nsub; i++) {
192
				assert(m->pmatch[i].rm_so == -1);
193
				assert(m->pmatch[i].rm_eo == -1);
194
			}
195
#endif
196
			NOTE("backoff dissect");
197
			dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
198
		}
199
		assert(dp == NULL || dp == endp);
200
		if (dp != NULL)		/* found a shorter one */
201
			break;
202
203
		/* despite initial appearances, there is no match here */
204
		NOTE("false alarm");
205
		start = m->coldp + 1;	/* recycle starting later */
206
		assert(start <= stop);
207
	}
208
209
	/* fill in the details if requested */
210
	if (nmatch > 0) {
211
		pmatch[0].rm_so = m->coldp - m->offp;
212
		pmatch[0].rm_eo = endp - m->offp;
213
	}
214
	if (nmatch > 1) {
215
		assert(m->pmatch != NULL);
216
		for (i = 1; i < nmatch; i++)
217
			if (i <= m->g->nsub)
218
				pmatch[i] = m->pmatch[i];
219
			else {
220
				pmatch[i].rm_so = -1;
221
				pmatch[i].rm_eo = -1;
222
			}
223
	}
224
225
	if (m->pmatch != NULL)
226
		free((char *)m->pmatch);
227
	if (m->lastpos != NULL)
228
		free((char *)m->lastpos);
229
	STATETEARDOWN(m);
230
	return(0);
231
}
232
233
/*
234
 - dissect - figure out what matched what, no back references
235
 == static char *dissect(register struct match *m, char *start, \
236
 ==	char *stop, sopno startst, sopno stopst);
237
 */
238
static char *			/* == stop (success) always */
239
dissect(charset, m, start, stop, startst, stopst)
240
CHARSET_INFO *charset;
241
register struct match *m;
242
char *start;
243
char *stop;
244
sopno startst;
245
sopno stopst;
246
{
247
	register uint i;
248
	register sopno ss;	/* start sop of current subRE */
249
	register sopno es;	/* end sop of current subRE */
250
	register char *sp;	/* start of string matched by it */
251
	register char *stp;	/* string matched by it cannot pass here */
252
	register char *rest;	/* start of rest of string */
253
	register char *tail;	/* string unmatched by rest of RE */
254
	register sopno ssub;	/* start sop of subsubRE */
255
	register sopno esub;	/* end sop of subsubRE */
256
	register char *ssp;	/* start of string matched by subsubRE */
257
	register char *sep;	/* end of string matched by subsubRE */
258
	register char *oldssp;	/* previous ssp */
259
	register char *dp;      /* used in debug mode to check asserts */
260
261
	AT("diss", start, stop, startst, stopst);
262
	sp = start;
263
	for (ss = startst; ss < stopst; ss = es) {
264
		/* identify end of subRE */
265
		es = ss;
266
		switch (OP(m->g->strip[es])) {
267
		case OPLUS_:
268
		case OQUEST_:
269
			es += OPND(m->g->strip[es]);
270
			break;
271
		case OCH_:
272
			while (OP(m->g->strip[es]) != O_CH)
273
				es += OPND(m->g->strip[es]);
274
			break;
275
		}
276
		es++;
277
278
		/* figure out what it matched */
279
		switch (OP(m->g->strip[ss])) {
280
		case OEND:
281
			assert(nope);
282
			break;
283
		case OCHAR:
284
			sp++;
285
			break;
286
		case OBOL:
287
		case OEOL:
288
		case OBOW:
289
		case OEOW:
290
			break;
291
		case OANY:
292
		case OANYOF:
293
			sp++;
294
			break;
295
		case OBACK_:
296
		case O_BACK:
297
			assert(nope);
298
			break;
299
		/* cases where length of match is hard to find */
300
		case OQUEST_:
301
			stp = stop;
302
			for (;;) {
303
				/* how long could this one be? */
304
				rest = slow(charset, m, sp, stp, ss, es);
305
				assert(rest != NULL);	/* it did match */
306
				/* could the rest match the rest? */
307
				tail = slow(charset, m, rest, stop, es, stopst);
308
				if (tail == stop)
309
					break;		/* yes! */
310
				/* no -- try a shorter match for this one */
311
				stp = rest - 1;
312
				assert(stp >= sp);	/* it did work */
313
			}
314
			ssub = ss + 1;
315
			esub = es - 1;
316
			/* did innards match? */
317
			if (slow(charset, m, sp, rest, ssub, esub) != NULL) {
318
				dp = dissect(charset, m, sp, rest, ssub, esub);
319
				assert(dp == rest);
320
			} else		/* no */
321
				assert(sp == rest);
322
			sp = rest;
323
			break;
324
		case OPLUS_:
325
			stp = stop;
326
			for (;;) {
327
				/* how long could this one be? */
328
				rest = slow(charset, m, sp, stp, ss, es);
329
				assert(rest != NULL);	/* it did match */
330
				/* could the rest match the rest? */
331
				tail = slow(charset, m, rest, stop, es, stopst);
332
				if (tail == stop)
333
					break;		/* yes! */
334
				/* no -- try a shorter match for this one */
335
				stp = rest - 1;
336
				assert(stp >= sp);	/* it did work */
337
			}
338
			ssub = ss + 1;
339
			esub = es - 1;
340
			ssp = sp;
341
			oldssp = ssp;
342
			for (;;) {	/* find last match of innards */
343
				sep = slow(charset, m, ssp, rest, ssub, esub);
344
				if (sep == NULL || sep == ssp)
345
					break;	/* failed or matched null */
346
				oldssp = ssp;	/* on to next try */
347
				ssp = sep;
348
			}
349
			if (sep == NULL) {
350
				/* last successful match */
351
				sep = ssp;
352
				ssp = oldssp;
353
			}
354
			assert(sep == rest);	/* must exhaust substring */
355
			assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
356
			dp = dissect(charset, m, ssp, sep, ssub, esub);
357
			assert(dp == sep);
358
			sp = rest;
359
			break;
360
		case OCH_:
361
			stp = stop;
362
			for (;;) {
363
				/* how long could this one be? */
364
				rest = slow(charset, m, sp, stp, ss, es);
365
				assert(rest != NULL);	/* it did match */
366
				/* could the rest match the rest? */
367
				tail = slow(charset, m, rest, stop, es, stopst);
368
				if (tail == stop)
369
					break;		/* yes! */
370
				/* no -- try a shorter match for this one */
371
				stp = rest - 1;
372
				assert(stp >= sp);	/* it did work */
373
			}
374
			ssub = ss + 1;
375
			esub = ss + OPND(m->g->strip[ss]) - 1;
376
			assert(OP(m->g->strip[esub]) == OOR1);
377
			for (;;) {	/* find first matching branch */
378
				if (slow(charset, m, sp, rest, ssub, esub) == rest)
379
					break;	/* it matched all of it */
380
				/* that one missed, try next one */
381
				assert(OP(m->g->strip[esub]) == OOR1);
382
				esub++;
383
				assert(OP(m->g->strip[esub]) == OOR2);
384
				ssub = esub + 1;
385
				esub += OPND(m->g->strip[esub]);
386
				if (OP(m->g->strip[esub]) == OOR2)
387
					esub--;
388
				else
389
					assert(OP(m->g->strip[esub]) == O_CH);
390
			}
391
			dp = dissect(charset, m, sp, rest, ssub, esub);
392
			assert(dp == rest);
393
			sp = rest;
394
			break;
395
		case O_PLUS:
396
		case O_QUEST:
397
		case OOR1:
398
		case OOR2:
399
		case O_CH:
400
			assert(nope);
401
			break;
402
		case OLPAREN:
403
			i = OPND(m->g->strip[ss]);
404
			assert(0 < i && i <= m->g->nsub);
405
			m->pmatch[i].rm_so = sp - m->offp;
406
			break;
407
		case ORPAREN:
408
			i = OPND(m->g->strip[ss]);
409
			assert(0 < i && i <= m->g->nsub);
410
			m->pmatch[i].rm_eo = sp - m->offp;
411
			break;
412
		default:		/* uh oh */
413
			assert(nope);
414
			break;
415
		}
416
	}
417
418
	assert(sp == stop);
419
	return(sp);
420
}
421
422
/*
423
 - backref - figure out what matched what, figuring in back references
424
 == static char *backref(register struct match *m, char *start, \
425
 ==	char *stop, sopno startst, sopno stopst, sopno lev);
426
 */
427
static char *			/* == stop (success) or NULL (failure) */
428
backref(charset,m, start, stop, startst, stopst, lev)
429
CHARSET_INFO *charset;
430
register struct match *m;
431
char *start;
432
char *stop;
433
sopno startst;
434
sopno stopst;
435
sopno lev;			/* PLUS nesting level */
436
{
437
	register uint i;
438
	register sopno ss;	/* start sop of current subRE */
439
	register char *sp;	/* start of string matched by it */
440
	register sopno ssub;	/* start sop of subsubRE */
441
	register sopno esub;	/* end sop of subsubRE */
442
	register char *ssp;	/* start of string matched by subsubRE */
443
	register char *dp;
444
	register size_t len;
445
	register int hard;
446
	register sop s;
447
	register regoff_t offsave;
448
	register cset *cs;
449
450
	AT("back", start, stop, startst, stopst);
451
	sp = start;
452
453
	/* get as far as we can with easy stuff */
454
	hard = 0;
455
	for (ss = startst; !hard && ss < stopst; ss++)
456
		switch (OP(s = m->g->strip[ss])) {
457
		case OCHAR:
458
			if (sp == stop || *sp++ != (char)OPND(s))
459
				return(NULL);
460
			break;
461
		case OANY:
462
			if (sp == stop)
463
				return(NULL);
464
			sp++;
465
			break;
466
		case OANYOF:
467
			cs = &m->g->sets[OPND(s)];
468
			if (sp == stop || !CHIN(cs, *sp++))
469
				return(NULL);
470
			break;
471
		case OBOL:
472
			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
473
					(sp < m->endp && *(sp-1) == '\n' &&
474
						(m->g->cflags&REG_NEWLINE)) )
475
				{ /* yes */ }
476
			else
477
				return(NULL);
478
			break;
479
		case OEOL:
480
			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
481
					(sp < m->endp && *sp == '\n' &&
482
						(m->g->cflags&REG_NEWLINE)) )
483
				{ /* yes */ }
484
			else
485
				return(NULL);
486
			break;
487
		case OBOW:
488
			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
489
					(sp < m->endp && *(sp-1) == '\n' &&
490
						(m->g->cflags&REG_NEWLINE)) ||
491
					(sp > m->beginp &&
492
							!ISWORD(charset,*(sp-1))) ) &&
493
					(sp < m->endp && ISWORD(charset,*sp)) )
494
				{ /* yes */ }
495
			else
496
				return(NULL);
497
			break;
498
		case OEOW:
499
			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
500
					(sp < m->endp && *sp == '\n' &&
501
						(m->g->cflags&REG_NEWLINE)) ||
502
					(sp < m->endp && !ISWORD(charset,*sp)) ) &&
503
					(sp > m->beginp && ISWORD(charset,*(sp-1))) )
504
				{ /* yes */ }
505
			else
506
				return(NULL);
507
			break;
508
		case O_QUEST:
509
			break;
510
		case OOR1:	/* matches null but needs to skip */
511
			ss++;
512
			s = m->g->strip[ss];
513
			do {
514
				assert(OP(s) == OOR2);
515
				ss += OPND(s);
516
			} while (OP(s = m->g->strip[ss]) != O_CH);
517
			/* note that the ss++ gets us past the O_CH */
518
			break;
519
		default:	/* have to make a choice */
520
			hard = 1;
521
			break;
522
		}
523
	if (!hard) {		/* that was it! */
524
		if (sp != stop)
525
			return(NULL);
526
		return(sp);
527
	}
528
	ss--;			/* adjust for the for's final increment */
529
530
	/* the hard stuff */
531
	AT("hard", sp, stop, ss, stopst);
532
	s = m->g->strip[ss];
533
	switch (OP(s)) {
534
	case OBACK_:		/* the vilest depths */
535
		i = OPND(s);
536
		assert(0 < i && i <= m->g->nsub);
537
		if (m->pmatch[i].rm_eo == -1)
538
			return(NULL);
539
		assert(m->pmatch[i].rm_so != -1);
540
		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
541
		assert((size_t) (stop - m->beginp) >= len);
542
		if (sp > stop - len)
543
			return(NULL);	/* not enough left to match */
544
		ssp = m->offp + m->pmatch[i].rm_so;
545
		if (memcmp(sp, ssp, len) != 0)
546
			return(NULL);
547
		while (m->g->strip[ss] != SOP(O_BACK, i))
548
			ss++;
549
		return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
550
		break;
551
	case OQUEST_:		/* to null or not */
552
		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
553
		if (dp != NULL)
554
			return(dp);	/* not */
555
		return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
556
		break;
557
	case OPLUS_:
558
		assert(m->lastpos != NULL);
559
		assert(lev+1 <= m->g->nplus);
560
		m->lastpos[lev+1] = sp;
561
		return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
562
		break;
563
	case O_PLUS:
564
		if (sp == m->lastpos[lev])	/* last pass matched null */
565
			return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
566
		/* try another pass */
567
		m->lastpos[lev] = sp;
568
		dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
569
		if (dp == NULL)
570
			return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
571
		else
572
			return(dp);
573
		break;
574
	case OCH_:		/* find the right one, if any */
575
		ssub = ss + 1;
576
		esub = ss + OPND(s) - 1;
577
		assert(OP(m->g->strip[esub]) == OOR1);
578
		for (;;) {	/* find first matching branch */
579
			dp = backref(charset, m, sp, stop, ssub, esub, lev);
580
			if (dp != NULL)
581
				return(dp);
582
			/* that one missed, try next one */
583
			if (OP(m->g->strip[esub]) == O_CH)
584
				return(NULL);	/* there is none */
585
			esub++;
586
			assert(OP(m->g->strip[esub]) == OOR2);
587
			ssub = esub + 1;
588
			esub += OPND(m->g->strip[esub]);
589
			if (OP(m->g->strip[esub]) == OOR2)
590
				esub--;
591
			else
592
				assert(OP(m->g->strip[esub]) == O_CH);
593
		}
594
		break;
595
	case OLPAREN:		/* must undo assignment if rest fails */
596
		i = OPND(s);
597
		assert(0 < i && i <= m->g->nsub);
598
		offsave = m->pmatch[i].rm_so;
599
		m->pmatch[i].rm_so = sp - m->offp;
600
		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
601
		if (dp != NULL)
602
			return(dp);
603
		m->pmatch[i].rm_so = offsave;
604
		return(NULL);
605
		break;
606
	case ORPAREN:		/* must undo assignment if rest fails */
607
		i = OPND(s);
608
		assert(0 < i && i <= m->g->nsub);
609
		offsave = m->pmatch[i].rm_eo;
610
		m->pmatch[i].rm_eo = sp - m->offp;
611
		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
612
		if (dp != NULL)
613
			return(dp);
614
		m->pmatch[i].rm_eo = offsave;
615
		return(NULL);
616
		break;
617
	default:		/* uh oh */
618
		assert(nope);
619
		break;
620
	}
621
622
	/* "can't happen" */
623
	assert(nope);
624
	/* NOTREACHED */
625
	return 0;				/* Keep gcc happy */
626
}
627
628
/*
629
 - fast - step through the string at top speed
630
 == static char *fast(register struct match *m, char *start, \
631
 ==	char *stop, sopno startst, sopno stopst);
632
 */
633
static char *			/* where tentative match ended, or NULL */
634
fast(charset, m, start, stop, startst, stopst)
635
CHARSET_INFO *charset;
636
register struct match *m;
637
char *start;
638
char *stop;
639
sopno startst;
640
sopno stopst;
641
{
642
	register states st = m->st;
643
	register states fresh = m->fresh;
644
	register states tmp = m->tmp;
645
	register char *p = start;
646
	register int c = (start == m->beginp) ? OUT : *(start-1);
647
	register int lastc;	/* previous c */
648
	register int flagch;
649
	register int i;
650
	register char *coldp;	/* last p after which no match was underway */
651
652
	CLEAR(st);
653
	SET1(st, startst);
654
	st = step(m->g, startst, stopst, st, NOTHING, st);
655
	ASSIGN(fresh, st);
656
	SP("start", st, *p);
657
	coldp = NULL;
658
	for (;;) {
659
		/* next character */
660
		lastc = c;
661
		c = (p == m->endp) ? OUT : *p;
662
		if (EQ(st, fresh))
663
			coldp = p;
664
665
		/* is there an EOL and/or BOL between lastc and c? */
666
		flagch = '\0';
667
		i = 0;
668
		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
669
				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
670
			flagch = BOL;
671
			i = m->g->nbol;
672
		}
673
		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
674
				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
675
			flagch = (flagch == BOL) ? BOLEOL : EOL;
676
			i += m->g->neol;
677
		}
678
		if (i != 0) {
679
			for (; i > 0; i--)
680
				st = step(m->g, startst, stopst, st, flagch, st);
681
			SP("boleol", st, c);
682
		}
683
684
		/* how about a word boundary? */
685
		if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
686
					(c != OUT && ISWORD(charset,c)) ) {
687
			flagch = BOW;
688
		}
689
		if ( (lastc != OUT && ISWORD(charset,lastc)) &&
690
				(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
691
			flagch = EOW;
692
		}
693
		if (flagch == BOW || flagch == EOW) {
694
			st = step(m->g, startst, stopst, st, flagch, st);
695
			SP("boweow", st, c);
696
		}
697
698
		/* are we done? */
699
		if (ISSET(st, stopst) || p == stop)
700
			break;		/* NOTE BREAK OUT */
701
702
		/* no, we must deal with this character */
703
		ASSIGN(tmp, st);
704
		ASSIGN(st, fresh);
705
		assert(c != OUT);
706
		st = step(m->g, startst, stopst, tmp, c, st);
707
		SP("aft", st, c);
708
		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
709
		p++;
710
	}
711
712
	assert(coldp != NULL);
713
	m->coldp = coldp;
714
	if (ISSET(st, stopst))
715
		return(p+1);
716
	else
717
		return(NULL);
718
}
719
720
/*
721
 - slow - step through the string more deliberately
722
 == static char *slow(register struct match *m, char *start, \
723
 ==	char *stop, sopno startst, sopno stopst);
724
 */
725
static char *			/* where it ended */
726
slow(charset, m, start, stop, startst, stopst)
727
CHARSET_INFO *charset;
728
register struct match *m;
729
char *start;
730
char *stop;
731
sopno startst;
732
sopno stopst;
733
{
734
	register states st = m->st;
735
	register states empty = m->empty;
736
	register states tmp = m->tmp;
737
	register char *p = start;
738
	register int c = (start == m->beginp) ? OUT : *(start-1);
739
	register int lastc;	/* previous c */
740
	register int flagch;
741
	register int i;
742
	register char *matchp;	/* last p at which a match ended */
743
744
	AT("slow", start, stop, startst, stopst);
745
	CLEAR(st);
746
	SET1(st, startst);
747
	SP("sstart", st, *p);
748
	st = step(m->g, startst, stopst, st, NOTHING, st);
749
	matchp = NULL;
750
	for (;;) {
751
		/* next character */
752
		lastc = c;
753
		c = (p == m->endp) ? OUT : *p;
754
755
		/* is there an EOL and/or BOL between lastc and c? */
756
		flagch = '\0';
757
		i = 0;
758
		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
759
				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
760
			flagch = BOL;
761
			i = m->g->nbol;
762
		}
763
		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
764
				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
765
			flagch = (flagch == BOL) ? BOLEOL : EOL;
766
			i += m->g->neol;
767
		}
768
		if (i != 0) {
769
			for (; i > 0; i--)
770
				st = step(m->g, startst, stopst, st, flagch, st);
771
			SP("sboleol", st, c);
772
		}
773
774
		/* how about a word boundary? */
775
		if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
776
					(c != OUT && ISWORD(charset,c)) ) {
777
			flagch = BOW;
778
		}
779
		if ( (lastc != OUT && ISWORD(charset,lastc)) &&
780
				(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
781
			flagch = EOW;
782
		}
783
		if (flagch == BOW || flagch == EOW) {
784
			st = step(m->g, startst, stopst, st, flagch, st);
785
			SP("sboweow", st, c);
786
		}
787
788
		/* are we done? */
789
		if (ISSET(st, stopst))
790
			matchp = p;
791
		if (EQ(st, empty) || p == stop)
792
			break;		/* NOTE BREAK OUT */
793
794
		/* no, we must deal with this character */
795
		ASSIGN(tmp, st);
796
		ASSIGN(st, empty);
797
		assert(c != OUT);
798
		st = step(m->g, startst, stopst, tmp, c, st);
799
		SP("saft", st, c);
800
		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
801
		p++;
802
	}
803
804
	return(matchp);
805
}
806
807
808
/*
809
 - step - map set of states reachable before char to set reachable after
810
 == static states step(register struct re_guts *g, sopno start, sopno stop, \
811
 ==	register states bef, int ch, register states aft);
812
 == #define	BOL	(OUT+1)
813
 == #define	EOL	(BOL+1)
814
 == #define	BOLEOL	(BOL+2)
815
 == #define	NOTHING	(BOL+3)
816
 == #define	BOW	(BOL+4)
817
 == #define	EOW	(BOL+5)
818
 == #define	CODEMAX	(BOL+5)		// highest code used
819
 == #define	NONCHAR(c)	((c) > CHAR_MAX)
820
 == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
821
 */
822
static states
823
step(g, start, stop, bef, ch, aft)
824
register struct re_guts *g;
825
sopno start;			/* start state within strip */
826
sopno stop;			/* state after stop state within strip */
827
register states bef;		/* states reachable before */
828
int ch;				/* character or NONCHAR code */
829
register states aft;		/* states already known reachable after */
830
{
831
	register cset *cs;
832
	register sop s;
833
	register sopno pc;
834
	register onestate here;		/* note, macros know this name */
835
	register sopno look;
836
	register onestate i;		/* Changed from int by Monty */
837
838
	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
839
		s = g->strip[pc];
840
		switch (OP(s)) {
841
		case OEND:
842
			assert(pc == stop-1);
843
			break;
844
		case OCHAR:
845
			/* only characters can match */
846
			assert(!NONCHAR(ch) || ch != (char)OPND(s));
847
			if (ch == (char)OPND(s))
848
				FWD(aft, bef, 1);
849
			break;
850
		case OBOL:
851
			if (ch == BOL || ch == BOLEOL)
852
				FWD(aft, bef, 1);
853
			break;
854
		case OEOL:
855
			if (ch == EOL || ch == BOLEOL)
856
				FWD(aft, bef, 1);
857
			break;
858
		case OBOW:
859
			if (ch == BOW)
860
				FWD(aft, bef, 1);
861
			break;
862
		case OEOW:
863
			if (ch == EOW)
864
				FWD(aft, bef, 1);
865
			break;
866
		case OANY:
867
			if (!NONCHAR(ch))
868
				FWD(aft, bef, 1);
869
			break;
870
		case OANYOF:
871
			cs = &g->sets[OPND(s)];
872
			if (!NONCHAR(ch) && CHIN(cs, ch))
873
				FWD(aft, bef, 1);
874
			break;
875
		case OBACK_:		/* ignored here */
876
		case O_BACK:
877
			FWD(aft, aft, 1);
878
			break;
879
		case OPLUS_:		/* forward, this is just an empty */
880
			FWD(aft, aft, 1);
881
			break;
882
		case O_PLUS:		/* both forward and back */
883
			FWD(aft, aft, 1);
884
			i = ISSETBACK(aft, OPND(s));
885
			BACK(aft, aft, OPND(s));
886
			if (!i && ISSETBACK(aft, OPND(s))) {
887
				/* oho, must reconsider loop body */
888
				pc -= OPND(s) + 1;
889
				INIT(here, pc);
890
			}
891
			break;
892
		case OQUEST_:		/* two branches, both forward */
893
			FWD(aft, aft, 1);
894
			FWD(aft, aft, OPND(s));
895
			break;
896
		case O_QUEST:		/* just an empty */
897
			FWD(aft, aft, 1);
898
			break;
899
		case OLPAREN:		/* not significant here */
900
		case ORPAREN:
901
			FWD(aft, aft, 1);
902
			break;
903
		case OCH_:		/* mark the first two branches */
904
			FWD(aft, aft, 1);
905
			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
906
			FWD(aft, aft, OPND(s));
907
			break;
908
		case OOR1:		/* done a branch, find the O_CH */
909
			if (ISSTATEIN(aft, here)) {
910
				for (look = 1;
911
						OP(s = g->strip[pc+look]) != O_CH;
912
						look += OPND(s))
913
					assert(OP(s) == OOR2);
914
				FWD(aft, aft, look);
915
			}
916
			break;
917
		case OOR2:		/* propagate OCH_'s marking */
918
			FWD(aft, aft, 1);
919
			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
920
				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
921
				FWD(aft, aft, OPND(s));
922
			}
923
			break;
924
		case O_CH:		/* just empty */
925
			FWD(aft, aft, 1);
926
			break;
927
		default:		/* ooooops... */
928
			assert(nope);
929
			break;
930
		}
931
	}
932
933
	return(aft);
934
}
935
936
#ifdef REDEBUG
937
/*
938
 - print - print a set of states
939
 == #ifdef REDEBUG
940
 == static void print(struct match *m, char *caption, states st, \
941
 ==	int ch, FILE *d);
942
 == #endif
943
 */
944
static void
945
print(m, caption, st, ch, d)
946
struct match *m;
947
char *caption;
948
states st;
949
int ch;
950
FILE *d;
951
{
952
	register struct re_guts *g = m->g;
953
	register int i;
954
	register int first = 1;
955
	char buf[10];
956
957
	if (!(m->eflags&REG_TRACE))
958
		return;
959
960
	fprintf(d, "%s", caption);
961
	if (ch != '\0')
962
		fprintf(d, " %s", printchar(ch,buf));
963
	for (i = 0; i < g->nstates; i++)
964
		if (ISSET(st, i)) {
965
			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
966
			first = 0;
967
		}
968
	fprintf(d, "\n");
969
}
970
971
/*
972
 - at - print current situation
973
 == #ifdef REDEBUG
974
 == static void at(struct match *m, char *title, char *start, char *stop, \
975
 ==						sopno startst, sopno stopst);
976
 == #endif
977
 */
978
static void
979
at(m, title, start, stop, startst, stopst)
980
struct match *m;
981
char *title;
982
char *start;
983
char *stop;
984
sopno startst;
985
sopno stopst;
986
{
987
	char buf[10];
988
	if (!(m->eflags&REG_TRACE))
989
		return;
990
991
	printf("%s %s-", title, printchar(*start,buf));
992
	printf("%s ", printchar(*stop,buf));
993
	printf("%ld-%ld\n", (long)startst, (long)stopst,buf);
994
}
995
996
#ifndef PCHARDONE
997
#define	PCHARDONE	/* never again */
998
/*
999
 - printchar - make a character printable
1000
 == #ifdef REDEBUG
1001
 == static char *printchar(int ch);
1002
 == #endif
1003
 *
1004
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1005
 * duplicate here avoids having a debugging-capable regexec.o tied to
1006
 * a matching debug.o, and this is convenient.  It all disappears in
1007
 * the non-debug compilation anyway, so it doesn't matter much.
1008
 */
1009
static char *			/* -> representation */
1010
printchar(ch,pbuf)
1011
int ch;
1012
char *pbuf;
1013
{
1014
	if (isprint(ch) || ch == ' ')
1015
		sprintf(pbuf, "%c", ch);
1016
	else
1017
		sprintf(pbuf, "\\%o", ch);
1018
	return(pbuf);
1019
}
1020
#endif
1021
#endif
1022
1023
#undef	matcher
1024
#undef	fast
1025
#undef	slow
1026
#undef	dissect
1027
#undef	backref
1028
#undef	step
1029
#undef	print
1030
#undef	at
1031
#undef	match