1
by brian
clean slate |
1 |
#include <my_global.h> |
2 |
#include <m_string.h> |
|
3 |
#include <m_ctype.h> |
|
4 |
#ifdef __WIN__
|
|
5 |
#include <limits.h> |
|
6 |
#endif
|
|
7 |
||
8 |
#include "my_regex.h" |
|
9 |
#include "utils.h" |
|
10 |
#include "regex2.h" |
|
11 |
||
12 |
#include "cclass.h" |
|
13 |
#include "cname.h" |
|
14 |
||
15 |
/*
|
|
16 |
* parse structure, passed up and down to avoid global variables and
|
|
17 |
* other clumsinesses
|
|
18 |
*/
|
|
19 |
struct parse { |
|
20 |
char *next; /* next character in RE */ |
|
21 |
char *end; /* end of string (-> NUL normally) */ |
|
22 |
int error; /* has an error been seen? */ |
|
23 |
sop *strip; /* malloced strip */ |
|
24 |
sopno ssize; /* malloced strip size (allocated) */ |
|
25 |
sopno slen; /* malloced strip length (used) */ |
|
26 |
int ncsalloc; /* number of csets allocated */ |
|
27 |
struct re_guts *g; |
|
28 |
# define NPAREN 10 /* we need to remember () 1-9 for back refs */ |
|
29 |
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ |
|
30 |
sopno pend[NPAREN]; /* -> ) ([0] unused) */ |
|
31 |
CHARSET_INFO *charset; /* for ctype things */ |
|
32 |
};
|
|
33 |
||
34 |
#include "regcomp.ih" |
|
35 |
||
36 |
static char nuls[10]; /* place to point scanner in event of error */ |
|
37 |
||
38 |
struct cclass cclasses[CCLASS_LAST+1]= { |
|
39 |
{ "alnum", "","", _MY_U | _MY_L | _MY_NMR}, |
|
40 |
{ "alpha", "","", _MY_U | _MY_L }, |
|
41 |
{ "blank", "","", _MY_B }, |
|
42 |
{ "cntrl", "","", _MY_CTR }, |
|
43 |
{ "digit", "","", _MY_NMR }, |
|
44 |
{ "graph", "","", _MY_PNT | _MY_U | _MY_L | _MY_NMR}, |
|
45 |
{ "lower", "","", _MY_L }, |
|
46 |
{ "print", "","", _MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B }, |
|
47 |
{ "punct", "","", _MY_PNT }, |
|
48 |
{ "space", "","", _MY_SPC }, |
|
49 |
{ "upper", "","", _MY_U }, |
|
50 |
{ "xdigit", "","", _MY_X }, |
|
51 |
{ NULL,NULL,NULL, 0 } |
|
52 |
};
|
|
53 |
||
54 |
/*
|
|
55 |
* macros for use with parse structure
|
|
56 |
* BEWARE: these know that the parse structure is named `p' !!!
|
|
57 |
*/
|
|
58 |
#define PEEK() (*p->next)
|
|
59 |
#define PEEK2() (*(p->next+1))
|
|
60 |
#define MORE() (p->next < p->end)
|
|
61 |
#define MORE2() (p->next+1 < p->end)
|
|
62 |
#define SEE(c) (MORE() && PEEK() == (c))
|
|
63 |
#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
|
|
64 |
#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
|
|
65 |
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
|
|
66 |
#define NEXT() (p->next++)
|
|
67 |
#define NEXT2() (p->next += 2)
|
|
68 |
#define NEXTn(n) (p->next += (n))
|
|
69 |
#define GETNEXT() (*p->next++)
|
|
70 |
#define SETERROR(e) seterr(p, (e))
|
|
71 |
#define REQUIRE(co, e) ((co) || SETERROR(e))
|
|
72 |
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
|
|
73 |
#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
|
|
74 |
#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
|
|
75 |
#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
|
|
76 |
#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
|
|
77 |
#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
|
|
78 |
#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
|
|
79 |
#define HERE() (p->slen)
|
|
80 |
#define THERE() (p->slen - 1)
|
|
81 |
#define THERETHERE() (p->slen - 2)
|
|
82 |
#define DROP(n) (p->slen -= (n))
|
|
83 |
||
84 |
#ifndef NDEBUG
|
|
85 |
static int never = 0; /* for use in asserts; shuts lint up */ |
|
86 |
#else
|
|
87 |
#define never 0 /* some <assert.h>s have bugs too */ |
|
88 |
#endif
|
|
89 |
||
90 |
/*
|
|
91 |
- regcomp - interface for parser and compilation
|
|
92 |
= extern int regcomp(regex_t *, const char *, int);
|
|
93 |
= #define REG_BASIC 0000
|
|
94 |
= #define REG_EXTENDED 0001
|
|
95 |
= #define REG_ICASE 0002
|
|
96 |
= #define REG_NOSUB 0004
|
|
97 |
= #define REG_NEWLINE 0010
|
|
98 |
= #define REG_NOSPEC 0020
|
|
99 |
= #define REG_PEND 0040
|
|
100 |
= #define REG_DUMP 0200
|
|
101 |
*/
|
|
102 |
int /* 0 success, otherwise REG_something */ |
|
103 |
my_regcomp(preg, pattern, cflags, charset) |
|
104 |
my_regex_t *preg; |
|
105 |
const char *pattern; |
|
106 |
int cflags; |
|
107 |
CHARSET_INFO *charset; |
|
108 |
{
|
|
109 |
struct parse pa; |
|
110 |
register struct re_guts *g; |
|
111 |
register struct parse *p = &pa; |
|
112 |
register int i; |
|
113 |
register size_t len; |
|
114 |
#ifdef REDEBUG
|
|
115 |
# define GOODFLAGS(f) (f)
|
|
116 |
#else
|
|
117 |
# define GOODFLAGS(f) ((f)&~REG_DUMP)
|
|
118 |
#endif
|
|
119 |
||
120 |
my_regex_init(charset); /* Init cclass if neaded */ |
|
121 |
preg->charset=charset; |
|
122 |
cflags = GOODFLAGS(cflags); |
|
123 |
if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) |
|
124 |
return(REG_INVARG); |
|
125 |
||
126 |
if (cflags®_PEND) { |
|
127 |
if (preg->re_endp < pattern) |
|
128 |
return(REG_INVARG); |
|
129 |
len = preg->re_endp - pattern; |
|
130 |
} else |
|
131 |
len = strlen((char *)pattern); |
|
132 |
||
133 |
/* do the mallocs early so failure handling is easy */
|
|
134 |
g = (struct re_guts *)malloc(sizeof(struct re_guts) + |
|
135 |
(NC-1)*sizeof(cat_t)); |
|
136 |
if (g == NULL) |
|
137 |
return(REG_ESPACE); |
|
138 |
p->ssize = (long) (len/(size_t)2*(size_t)3 + (size_t)1); /* ugh */ |
|
139 |
p->strip = (sop *)malloc(p->ssize * sizeof(sop)); |
|
140 |
p->slen = 0; |
|
141 |
if (p->strip == NULL) { |
|
142 |
free((char *)g); |
|
143 |
return(REG_ESPACE); |
|
144 |
}
|
|
145 |
||
146 |
/* set things up */
|
|
147 |
p->g = g; |
|
148 |
p->next = (char *)pattern; /* convenience; we do not modify it */ |
|
149 |
p->end = p->next + len; |
|
150 |
p->error = 0; |
|
151 |
p->ncsalloc = 0; |
|
152 |
p->charset = preg->charset; |
|
153 |
for (i = 0; i < NPAREN; i++) { |
|
154 |
p->pbegin[i] = 0; |
|
155 |
p->pend[i] = 0; |
|
156 |
}
|
|
157 |
g->csetsize = NC; |
|
158 |
g->sets = NULL; |
|
159 |
g->setbits = NULL; |
|
160 |
g->ncsets = 0; |
|
161 |
g->cflags = cflags; |
|
162 |
g->iflags = 0; |
|
163 |
g->nbol = 0; |
|
164 |
g->neol = 0; |
|
165 |
g->must = NULL; |
|
166 |
g->mlen = 0; |
|
167 |
g->nsub = 0; |
|
168 |
g->ncategories = 1; /* category 0 is "everything else" */ |
|
169 |
g->categories = &g->catspace[-(CHAR_MIN)]; |
|
170 |
(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); |
|
171 |
g->backrefs = 0; |
|
172 |
||
173 |
/* do it */
|
|
174 |
EMIT(OEND, 0); |
|
175 |
g->firststate = THERE(); |
|
176 |
if (cflags®_EXTENDED) |
|
177 |
p_ere(p, OUT); |
|
178 |
else if (cflags®_NOSPEC) |
|
179 |
p_str(p); |
|
180 |
else
|
|
181 |
p_bre(p, OUT, OUT); |
|
182 |
EMIT(OEND, 0); |
|
183 |
g->laststate = THERE(); |
|
184 |
||
185 |
/* tidy up loose ends and fill things in */
|
|
186 |
categorize(p, g); |
|
187 |
stripsnug(p, g); |
|
188 |
findmust(p, g); |
|
189 |
g->nplus = pluscount(p, g); |
|
190 |
g->magic = MAGIC2; |
|
191 |
preg->re_nsub = g->nsub; |
|
192 |
preg->re_g = g; |
|
193 |
preg->re_magic = MAGIC1; |
|
194 |
#ifndef REDEBUG
|
|
195 |
/* not debugging, so can't rely on the assert() in regexec() */
|
|
196 |
if (g->iflags&BAD) |
|
197 |
SETERROR(REG_ASSERT); |
|
198 |
#endif
|
|
199 |
||
200 |
/* win or lose, we're done */
|
|
201 |
if (p->error != 0) /* lose */ |
|
202 |
my_regfree(preg); |
|
203 |
return(p->error); |
|
204 |
}
|
|
205 |
||
206 |
/*
|
|
207 |
- p_ere - ERE parser top level, concatenation and alternation
|
|
208 |
== static void p_ere(register struct parse *p, int stop);
|
|
209 |
*/
|
|
210 |
static void |
|
211 |
p_ere(p, stop) |
|
212 |
register struct parse *p; |
|
213 |
int stop; /* character this ERE should end at */ |
|
214 |
{
|
|
215 |
register char c; |
|
216 |
register sopno prevback; |
|
217 |
register sopno prevfwd; |
|
218 |
register sopno conc; |
|
219 |
register int first = 1; /* is this the first alternative? */ |
|
220 |
for (;;) { |
|
221 |
/* do a bunch of concatenated expressions */
|
|
222 |
conc = HERE(); |
|
223 |
while (MORE() && (c = PEEK()) != '|' && c != stop) |
|
224 |
p_ere_exp(p); |
|
225 |
if(REQUIRE(HERE() != conc, REG_EMPTY)) {}/* require nonempty */ |
|
226 |
||
227 |
if (!EAT('|')) |
|
228 |
break; /* NOTE BREAK OUT */ |
|
229 |
||
230 |
if (first) { |
|
231 |
INSERT(OCH_, conc); /* offset is wrong */ |
|
232 |
prevfwd = conc; |
|
233 |
prevback = conc; |
|
234 |
first = 0; |
|
235 |
}
|
|
236 |
ASTERN(OOR1, prevback); |
|
237 |
prevback = THERE(); |
|
238 |
AHEAD(prevfwd); /* fix previous offset */ |
|
239 |
prevfwd = HERE(); |
|
240 |
EMIT(OOR2, 0); /* offset is very wrong */ |
|
241 |
}
|
|
242 |
||
243 |
if (!first) { /* tail-end fixups */ |
|
244 |
AHEAD(prevfwd); |
|
245 |
ASTERN(O_CH, prevback); |
|
246 |
}
|
|
247 |
||
248 |
assert(!MORE() || SEE(stop)); |
|
249 |
}
|
|
250 |
||
251 |
/*
|
|
252 |
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
|
|
253 |
== static void p_ere_exp(register struct parse *p);
|
|
254 |
*/
|
|
255 |
static void |
|
256 |
p_ere_exp(p) |
|
257 |
register struct parse *p; |
|
258 |
{
|
|
259 |
register char c; |
|
260 |
register sopno pos; |
|
261 |
register int count; |
|
262 |
register int count2; |
|
263 |
register sopno subno; |
|
264 |
int wascaret = 0; |
|
265 |
||
266 |
assert(MORE()); /* caller should have ensured this */ |
|
267 |
c = GETNEXT(); |
|
268 |
||
269 |
pos = HERE(); |
|
270 |
switch (c) { |
|
271 |
case '(': |
|
272 |
if(REQUIRE(MORE(), REG_EPAREN)) {} |
|
273 |
p->g->nsub++; |
|
274 |
subno = (sopno) p->g->nsub; |
|
275 |
if (subno < NPAREN) |
|
276 |
p->pbegin[subno] = HERE(); |
|
277 |
EMIT(OLPAREN, subno); |
|
278 |
if (!SEE(')')) |
|
279 |
p_ere(p, ')'); |
|
280 |
if (subno < NPAREN) { |
|
281 |
p->pend[subno] = HERE(); |
|
282 |
assert(p->pend[subno] != 0); |
|
283 |
}
|
|
284 |
EMIT(ORPAREN, subno); |
|
285 |
if(MUSTEAT(')', REG_EPAREN)) {} |
|
286 |
break; |
|
287 |
#ifndef POSIX_MISTAKE
|
|
288 |
case ')': /* happens only if no current unmatched ( */ |
|
289 |
/*
|
|
290 |
* You may ask, why the ifndef? Because I didn't notice
|
|
291 |
* this until slightly too late for 1003.2, and none of the
|
|
292 |
* other 1003.2 regular-expression reviewers noticed it at
|
|
293 |
* all. So an unmatched ) is legal POSIX, at least until
|
|
294 |
* we can get it fixed.
|
|
295 |
*/
|
|
296 |
SETERROR(REG_EPAREN); |
|
297 |
break; |
|
298 |
#endif
|
|
299 |
case '^': |
|
300 |
EMIT(OBOL, 0); |
|
301 |
p->g->iflags |= USEBOL; |
|
302 |
p->g->nbol++; |
|
303 |
wascaret = 1; |
|
304 |
break; |
|
305 |
case '$': |
|
306 |
EMIT(OEOL, 0); |
|
307 |
p->g->iflags |= USEEOL; |
|
308 |
p->g->neol++; |
|
309 |
break; |
|
310 |
case '|': |
|
311 |
SETERROR(REG_EMPTY); |
|
312 |
break; |
|
313 |
case '*': |
|
314 |
case '+': |
|
315 |
case '?': |
|
316 |
SETERROR(REG_BADRPT); |
|
317 |
break; |
|
318 |
case '.': |
|
319 |
if (p->g->cflags®_NEWLINE) |
|
320 |
nonnewline(p); |
|
321 |
else
|
|
322 |
EMIT(OANY, 0); |
|
323 |
break; |
|
324 |
case '[': |
|
325 |
p_bracket(p); |
|
326 |
break; |
|
327 |
case '\\': |
|
328 |
if(REQUIRE(MORE(), REG_EESCAPE)) {} |
|
329 |
c = GETNEXT(); |
|
330 |
ordinary(p, c); |
|
331 |
break; |
|
332 |
case '{': /* okay as ordinary except if digit follows */ |
|
333 |
if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {} |
|
334 |
/* FALLTHROUGH */
|
|
335 |
default: |
|
336 |
ordinary(p, c); |
|
337 |
break; |
|
338 |
}
|
|
339 |
||
340 |
if (!MORE()) |
|
341 |
return; |
|
342 |
c = PEEK(); |
|
343 |
/* we call { a repetition if followed by a digit */
|
|
344 |
if (!( c == '*' || c == '+' || c == '?' || |
|
345 |
(c == '{' && MORE2() && |
|
346 |
my_isdigit(p->charset,PEEK2())) )) |
|
347 |
return; /* no repetition, we're done */ |
|
348 |
NEXT(); |
|
349 |
||
350 |
if(REQUIRE(!wascaret, REG_BADRPT)) {} |
|
351 |
switch (c) { |
|
352 |
case '*': /* implemented as +? */ |
|
353 |
/* this case does not require the (y|) trick, noKLUDGE */
|
|
354 |
INSERT(OPLUS_, pos); |
|
355 |
ASTERN(O_PLUS, pos); |
|
356 |
INSERT(OQUEST_, pos); |
|
357 |
ASTERN(O_QUEST, pos); |
|
358 |
break; |
|
359 |
case '+': |
|
360 |
INSERT(OPLUS_, pos); |
|
361 |
ASTERN(O_PLUS, pos); |
|
362 |
break; |
|
363 |
case '?': |
|
364 |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
|
|
365 |
INSERT(OCH_, pos); /* offset slightly wrong */ |
|
366 |
ASTERN(OOR1, pos); /* this one's right */ |
|
367 |
AHEAD(pos); /* fix the OCH_ */ |
|
368 |
EMIT(OOR2, 0); /* offset very wrong... */ |
|
369 |
AHEAD(THERE()); /* ...so fix it */ |
|
370 |
ASTERN(O_CH, THERETHERE()); |
|
371 |
break; |
|
372 |
case '{': |
|
373 |
count = p_count(p); |
|
374 |
if (EAT(',')) { |
|
375 |
if (my_isdigit(p->charset,PEEK())) { |
|
376 |
count2 = p_count(p); |
|
377 |
if(REQUIRE(count <= count2, REG_BADBR)) {} |
|
378 |
} else /* single number with comma */ |
|
379 |
count2 = RE_INFINITY; |
|
380 |
} else /* just a single number */ |
|
381 |
count2 = count; |
|
382 |
repeat(p, pos, count, count2); |
|
383 |
if (!EAT('}')) { /* error heuristics */ |
|
384 |
while (MORE() && PEEK() != '}') |
|
385 |
NEXT(); |
|
386 |
if(REQUIRE(MORE(), REG_EBRACE)) {} |
|
387 |
SETERROR(REG_BADBR); |
|
388 |
}
|
|
389 |
break; |
|
390 |
}
|
|
391 |
||
392 |
if (!MORE()) |
|
393 |
return; |
|
394 |
c = PEEK(); |
|
395 |
if (!( c == '*' || c == '+' || c == '?' || |
|
396 |
(c == '{' && MORE2() && |
|
397 |
my_isdigit(p->charset,PEEK2())) ) ) |
|
398 |
return; |
|
399 |
SETERROR(REG_BADRPT); |
|
400 |
}
|
|
401 |
||
402 |
/*
|
|
403 |
- p_str - string (no metacharacters) "parser"
|
|
404 |
== static void p_str(register struct parse *p);
|
|
405 |
*/
|
|
406 |
static void |
|
407 |
p_str(p) |
|
408 |
register struct parse *p; |
|
409 |
{
|
|
410 |
if(REQUIRE(MORE(), REG_EMPTY)) {} |
|
411 |
while (MORE()) |
|
412 |
ordinary(p, GETNEXT()); |
|
413 |
}
|
|
414 |
||
415 |
/*
|
|
416 |
- p_bre - BRE parser top level, anchoring and concatenation
|
|
417 |
== static void p_bre(register struct parse *p, register int end1, \
|
|
418 |
== register int end2);
|
|
419 |
* Giving end1 as OUT essentially eliminates the end1/end2 check.
|
|
420 |
*
|
|
421 |
* This implementation is a bit of a kludge, in that a trailing $ is first
|
|
422 |
* taken as an ordinary character and then revised to be an anchor. The
|
|
423 |
* only undesirable side effect is that '$' gets included as a character
|
|
424 |
* category in such cases. This is fairly harmless; not worth fixing.
|
|
425 |
* The amount of lookahead needed to avoid this kludge is excessive.
|
|
426 |
*/
|
|
427 |
static void |
|
428 |
p_bre(p, end1, end2) |
|
429 |
register struct parse *p; |
|
430 |
register int end1; /* first terminating character */ |
|
431 |
register int end2; /* second terminating character */ |
|
432 |
{
|
|
433 |
register sopno start = HERE(); |
|
434 |
register int first = 1; /* first subexpression? */ |
|
435 |
register int wasdollar = 0; |
|
436 |
||
437 |
if (EAT('^')) { |
|
438 |
EMIT(OBOL, 0); |
|
439 |
p->g->iflags |= USEBOL; |
|
440 |
p->g->nbol++; |
|
441 |
}
|
|
442 |
while (MORE() && !SEETWO(end1, end2)) { |
|
443 |
wasdollar = p_simp_re(p, first); |
|
444 |
first = 0; |
|
445 |
}
|
|
446 |
if (wasdollar) { /* oops, that was a trailing anchor */ |
|
447 |
DROP(1); |
|
448 |
EMIT(OEOL, 0); |
|
449 |
p->g->iflags |= USEEOL; |
|
450 |
p->g->neol++; |
|
451 |
}
|
|
452 |
||
453 |
if(REQUIRE(HERE() != start, REG_EMPTY)) {} /* require nonempty */ |
|
454 |
}
|
|
455 |
||
456 |
/*
|
|
457 |
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
|
|
458 |
== static int p_simp_re(register struct parse *p, int starordinary);
|
|
459 |
*/
|
|
460 |
static int /* was the simple RE an unbackslashed $? */ |
|
461 |
p_simp_re(p, starordinary) |
|
462 |
register struct parse *p; |
|
463 |
int starordinary; /* is a leading * an ordinary character? */ |
|
464 |
{
|
|
465 |
register int c; |
|
466 |
register int count; |
|
467 |
register int count2; |
|
468 |
register sopno pos; |
|
469 |
register int i; |
|
470 |
register sopno subno; |
|
471 |
# define BACKSL (1<<CHAR_BIT)
|
|
472 |
||
473 |
pos = HERE(); /* repetion op, if any, covers from here */ |
|
474 |
||
475 |
assert(MORE()); /* caller should have ensured this */ |
|
476 |
c = GETNEXT(); |
|
477 |
if (c == '\\') { |
|
478 |
if(REQUIRE(MORE(), REG_EESCAPE)) {} |
|
479 |
c = BACKSL | (unsigned char)GETNEXT(); |
|
480 |
}
|
|
481 |
switch (c) { |
|
482 |
case '.': |
|
483 |
if (p->g->cflags®_NEWLINE) |
|
484 |
nonnewline(p); |
|
485 |
else
|
|
486 |
EMIT(OANY, 0); |
|
487 |
break; |
|
488 |
case '[': |
|
489 |
p_bracket(p); |
|
490 |
break; |
|
491 |
case BACKSL|'{': |
|
492 |
SETERROR(REG_BADRPT); |
|
493 |
break; |
|
494 |
case BACKSL|'(': |
|
495 |
p->g->nsub++; |
|
496 |
subno = (sopno) p->g->nsub; |
|
497 |
if (subno < NPAREN) |
|
498 |
p->pbegin[subno] = HERE(); |
|
499 |
EMIT(OLPAREN, subno); |
|
500 |
/* the MORE here is an error heuristic */
|
|
501 |
if (MORE() && !SEETWO('\\', ')')) |
|
502 |
p_bre(p, '\\', ')'); |
|
503 |
if (subno < NPAREN) { |
|
504 |
p->pend[subno] = HERE(); |
|
505 |
assert(p->pend[subno] != 0); |
|
506 |
}
|
|
507 |
EMIT(ORPAREN, subno); |
|
508 |
if(REQUIRE(EATTWO('\\', ')'), REG_EPAREN)) {} |
|
509 |
break; |
|
510 |
case BACKSL|')': /* should not get here -- must be user */ |
|
511 |
case BACKSL|'}': |
|
512 |
SETERROR(REG_EPAREN); |
|
513 |
break; |
|
514 |
case BACKSL|'1': |
|
515 |
case BACKSL|'2': |
|
516 |
case BACKSL|'3': |
|
517 |
case BACKSL|'4': |
|
518 |
case BACKSL|'5': |
|
519 |
case BACKSL|'6': |
|
520 |
case BACKSL|'7': |
|
521 |
case BACKSL|'8': |
|
522 |
case BACKSL|'9': |
|
523 |
i = (c&~BACKSL) - '0'; |
|
524 |
assert(i < NPAREN); |
|
525 |
if (p->pend[i] != 0) { |
|
526 |
assert((uint) i <= p->g->nsub); |
|
527 |
EMIT(OBACK_, i); |
|
528 |
assert(p->pbegin[i] != 0); |
|
529 |
assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); |
|
530 |
assert(OP(p->strip[p->pend[i]]) == ORPAREN); |
|
531 |
(void) dupl(p, p->pbegin[i]+1, p->pend[i]); |
|
532 |
EMIT(O_BACK, i); |
|
533 |
} else |
|
534 |
SETERROR(REG_ESUBREG); |
|
535 |
p->g->backrefs = 1; |
|
536 |
break; |
|
537 |
case '*': |
|
538 |
if(REQUIRE(starordinary, REG_BADRPT)) {} |
|
539 |
/* FALLTHROUGH */
|
|
540 |
default: |
|
541 |
ordinary(p, c &~ BACKSL); |
|
542 |
break; |
|
543 |
}
|
|
544 |
||
545 |
if (EAT('*')) { /* implemented as +? */ |
|
546 |
/* this case does not require the (y|) trick, noKLUDGE */
|
|
547 |
INSERT(OPLUS_, pos); |
|
548 |
ASTERN(O_PLUS, pos); |
|
549 |
INSERT(OQUEST_, pos); |
|
550 |
ASTERN(O_QUEST, pos); |
|
551 |
} else if (EATTWO('\\', '{')) { |
|
552 |
count = p_count(p); |
|
553 |
if (EAT(',')) { |
|
554 |
if (MORE() && my_isdigit(p->charset,PEEK())) { |
|
555 |
count2 = p_count(p); |
|
556 |
if(REQUIRE(count <= count2, REG_BADBR)) {} |
|
557 |
} else /* single number with comma */ |
|
558 |
count2 = RE_INFINITY; |
|
559 |
} else /* just a single number */ |
|
560 |
count2 = count; |
|
561 |
repeat(p, pos, count, count2); |
|
562 |
if (!EATTWO('\\', '}')) { /* error heuristics */ |
|
563 |
while (MORE() && !SEETWO('\\', '}')) |
|
564 |
NEXT(); |
|
565 |
if(REQUIRE(MORE(), REG_EBRACE)) {} |
|
566 |
SETERROR(REG_BADBR); |
|
567 |
}
|
|
568 |
} else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ |
|
569 |
return(1); |
|
570 |
||
571 |
return(0); |
|
572 |
}
|
|
573 |
||
574 |
/*
|
|
575 |
- p_count - parse a repetition count
|
|
576 |
== static int p_count(register struct parse *p);
|
|
577 |
*/
|
|
578 |
static int /* the value */ |
|
579 |
p_count(p) |
|
580 |
register struct parse *p; |
|
581 |
{
|
|
582 |
register int count = 0; |
|
583 |
register int ndigits = 0; |
|
584 |
||
585 |
while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) { |
|
586 |
count = count*10 + (GETNEXT() - '0'); |
|
587 |
ndigits++; |
|
588 |
}
|
|
589 |
||
590 |
if(REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR)) {} |
|
591 |
return(count); |
|
592 |
}
|
|
593 |
||
594 |
/*
|
|
595 |
- p_bracket - parse a bracketed character list
|
|
596 |
== static void p_bracket(register struct parse *p);
|
|
597 |
*
|
|
598 |
* Note a significant property of this code: if the allocset() did SETERROR,
|
|
599 |
* no set operations are done.
|
|
600 |
*/
|
|
601 |
static void |
|
602 |
p_bracket(p) |
|
603 |
register struct parse *p; |
|
604 |
{
|
|
605 |
register cset *cs = allocset(p); |
|
606 |
register int invert = 0; |
|
607 |
||
608 |
/* Dept of Truly Sickening Special-Case Kludges */
|
|
609 |
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { |
|
610 |
EMIT(OBOW, 0); |
|
611 |
NEXTn(6); |
|
612 |
return; |
|
613 |
}
|
|
614 |
if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { |
|
615 |
EMIT(OEOW, 0); |
|
616 |
NEXTn(6); |
|
617 |
return; |
|
618 |
}
|
|
619 |
||
620 |
if (EAT('^')) |
|
621 |
invert++; /* make note to invert set at end */ |
|
622 |
if (EAT(']')) |
|
623 |
CHadd(cs, ']'); |
|
624 |
else if (EAT('-')) |
|
625 |
CHadd(cs, '-'); |
|
626 |
while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) |
|
627 |
p_b_term(p, cs); |
|
628 |
if (EAT('-')) |
|
629 |
CHadd(cs, '-'); |
|
630 |
if(MUSTEAT(']', REG_EBRACK)) {} |
|
631 |
||
632 |
if (p->error != 0) /* don't mess things up further */ |
|
633 |
return; |
|
634 |
||
635 |
if (p->g->cflags®_ICASE) { |
|
636 |
register int i; |
|
637 |
register int ci; |
|
638 |
||
639 |
for (i = p->g->csetsize - 1; i >= 0; i--) |
|
640 |
if (CHIN(cs, i) && my_isalpha(p->charset,i)) { |
|
641 |
ci = othercase(p->charset,i); |
|
642 |
if (ci != i) |
|
643 |
CHadd(cs, ci); |
|
644 |
}
|
|
645 |
if (cs->multis != NULL) |
|
646 |
mccase(p, cs); |
|
647 |
}
|
|
648 |
if (invert) { |
|
649 |
register int i; |
|
650 |
||
651 |
for (i = p->g->csetsize - 1; i >= 0; i--) |
|
652 |
if (CHIN(cs, i)) |
|
653 |
CHsub(cs, i); |
|
654 |
else
|
|
655 |
CHadd(cs, i); |
|
656 |
if (p->g->cflags®_NEWLINE) |
|
657 |
CHsub(cs, '\n'); |
|
658 |
if (cs->multis != NULL) |
|
659 |
mcinvert(p, cs); |
|
660 |
}
|
|
661 |
||
662 |
assert(cs->multis == NULL); /* xxx */ |
|
663 |
||
664 |
if (nch(p, cs) == 1) { /* optimize singleton sets */ |
|
665 |
ordinary(p, firstch(p, cs)); |
|
666 |
freeset(p, cs); |
|
667 |
} else |
|
668 |
EMIT(OANYOF, freezeset(p, cs)); |
|
669 |
}
|
|
670 |
||
671 |
/*
|
|
672 |
- p_b_term - parse one term of a bracketed character list
|
|
673 |
== static void p_b_term(register struct parse *p, register cset *cs);
|
|
674 |
*/
|
|
675 |
static void |
|
676 |
p_b_term(p, cs) |
|
677 |
register struct parse *p; |
|
678 |
register cset *cs; |
|
679 |
{
|
|
680 |
register char c; |
|
681 |
register char start, finish; |
|
682 |
register int i; |
|
683 |
||
684 |
/* classify what we've got */
|
|
685 |
switch ((MORE()) ? PEEK() : '\0') { |
|
686 |
case '[': |
|
687 |
c = (MORE2()) ? PEEK2() : '\0'; |
|
688 |
break; |
|
689 |
case '-': |
|
690 |
SETERROR(REG_ERANGE); |
|
691 |
return; /* NOTE RETURN */ |
|
692 |
break; |
|
693 |
default: |
|
694 |
c = '\0'; |
|
695 |
break; |
|
696 |
}
|
|
697 |
||
698 |
switch (c) { |
|
699 |
case ':': /* character class */ |
|
700 |
NEXT2(); |
|
701 |
if(REQUIRE(MORE(), REG_EBRACK)) {} |
|
702 |
c = PEEK(); |
|
703 |
if(REQUIRE(c != '-' && c != ']', REG_ECTYPE)) {} |
|
704 |
p_b_cclass(p, cs); |
|
705 |
if(REQUIRE(MORE(), REG_EBRACK)) {} |
|
706 |
if(REQUIRE(EATTWO(':', ']'), REG_ECTYPE)) {} |
|
707 |
break; |
|
708 |
case '=': /* equivalence class */ |
|
709 |
NEXT2(); |
|
710 |
if(REQUIRE(MORE(), REG_EBRACK)) {} |
|
711 |
c = PEEK(); |
|
712 |
if(REQUIRE(c != '-' && c != ']', REG_ECOLLATE)) {} |
|
713 |
p_b_eclass(p, cs); |
|
714 |
if(REQUIRE(MORE(), REG_EBRACK)) {} |
|
715 |
if(REQUIRE(EATTWO('=', ']'), REG_ECOLLATE)) {} |
|
716 |
break; |
|
717 |
default: /* symbol, ordinary character, or range */ |
|
718 |
/* xxx revision needed for multichar stuff */
|
|
719 |
start = p_b_symbol(p); |
|
720 |
if (SEE('-') && MORE2() && PEEK2() != ']') { |
|
721 |
/* range */
|
|
722 |
NEXT(); |
|
723 |
if (EAT('-')) |
|
724 |
finish = '-'; |
|
725 |
else
|
|
726 |
finish = p_b_symbol(p); |
|
727 |
} else |
|
728 |
finish = start; |
|
729 |
/* xxx what about signed chars here... */
|
|
730 |
if(REQUIRE(start <= finish, REG_ERANGE)) {} |
|
731 |
for (i = start; i <= finish; i++) |
|
732 |
CHadd(cs, i); |
|
733 |
break; |
|
734 |
}
|
|
735 |
}
|
|
736 |
||
737 |
/*
|
|
738 |
- p_b_cclass - parse a character-class name and deal with it
|
|
739 |
== static void p_b_cclass(register struct parse *p, register cset *cs);
|
|
740 |
*/
|
|
741 |
static void |
|
742 |
p_b_cclass(p, cs) |
|
743 |
register struct parse *p; |
|
744 |
register cset *cs; |
|
745 |
{
|
|
746 |
register char *sp = p->next; |
|
747 |
register struct cclass *cp; |
|
748 |
register size_t len; |
|
749 |
||
750 |
while (MORE() && my_isalpha(p->charset,PEEK())) |
|
751 |
NEXT(); |
|
752 |
len = p->next - sp; |
|
753 |
for (cp = cclasses; cp->name != NULL; cp++) |
|
754 |
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') |
|
755 |
break; |
|
756 |
if (cp->name == NULL) { |
|
757 |
/* oops, didn't find it */
|
|
758 |
SETERROR(REG_ECTYPE); |
|
759 |
return; |
|
760 |
}
|
|
761 |
||
762 |
#ifndef USE_ORIG_REGEX_CODE
|
|
763 |
{
|
|
764 |
register size_t i; |
|
765 |
for (i=1 ; i<256 ; i++) |
|
766 |
if (p->charset->ctype[i+1] & cp->mask) |
|
767 |
CHadd(cs, i); |
|
768 |
}
|
|
769 |
#else
|
|
770 |
{
|
|
771 |
register char *u = (char*) cp->chars; |
|
772 |
register char c; |
|
773 |
||
774 |
while ((c = *u++) != '\0') |
|
775 |
CHadd(cs, c); |
|
776 |
||
777 |
for (u = (char*) cp->multis; *u != '\0'; u += strlen(u) + 1) |
|
778 |
MCadd(p, cs, u); |
|
779 |
}
|
|
780 |
#endif
|
|
781 |
||
782 |
}
|
|
783 |
||
784 |
/*
|
|
785 |
- p_b_eclass - parse an equivalence-class name and deal with it
|
|
786 |
== static void p_b_eclass(register struct parse *p, register cset *cs);
|
|
787 |
*
|
|
788 |
* This implementation is incomplete. xxx
|
|
789 |
*/
|
|
790 |
static void |
|
791 |
p_b_eclass(p, cs) |
|
792 |
register struct parse *p; |
|
793 |
register cset *cs; |
|
794 |
{
|
|
795 |
register char c; |
|
796 |
||
797 |
c = p_b_coll_elem(p, '='); |
|
798 |
CHadd(cs, c); |
|
799 |
}
|
|
800 |
||
801 |
/*
|
|
802 |
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
|
|
803 |
== static char p_b_symbol(register struct parse *p);
|
|
804 |
*/
|
|
805 |
static char /* value of symbol */ |
|
806 |
p_b_symbol(p) |
|
807 |
register struct parse *p; |
|
808 |
{
|
|
809 |
register char value; |
|
810 |
||
811 |
if(REQUIRE(MORE(), REG_EBRACK)) {} |
|
812 |
if (!EATTWO('[', '.')) |
|
813 |
return(GETNEXT()); |
|
814 |
||
815 |
/* collating symbol */
|
|
816 |
value = p_b_coll_elem(p, '.'); |
|
817 |
if(REQUIRE(EATTWO('.', ']'), REG_ECOLLATE)) {} |
|
818 |
return(value); |
|
819 |
}
|
|
820 |
||
821 |
/*
|
|
822 |
- p_b_coll_elem - parse a collating-element name and look it up
|
|
823 |
== static char p_b_coll_elem(register struct parse *p, int endc);
|
|
824 |
*/
|
|
825 |
static char /* value of collating element */ |
|
826 |
p_b_coll_elem(p, endc) |
|
827 |
register struct parse *p; |
|
828 |
int endc; /* name ended by endc,']' */ |
|
829 |
{
|
|
830 |
register char *sp = p->next; |
|
831 |
register struct cname *cp; |
|
832 |
#ifdef _WIN64
|
|
833 |
register __int64 len; |
|
834 |
#else
|
|
835 |
register int len; |
|
836 |
#endif
|
|
837 |
while (MORE() && !SEETWO(endc, ']')) |
|
838 |
NEXT(); |
|
839 |
if (!MORE()) { |
|
840 |
SETERROR(REG_EBRACK); |
|
841 |
return(0); |
|
842 |
}
|
|
843 |
len = p->next - sp; |
|
844 |
for (cp = cnames; cp->name != NULL; cp++) |
|
845 |
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') |
|
846 |
return(cp->code); /* known name */ |
|
847 |
if (len == 1) |
|
848 |
return(*sp); /* single character */ |
|
849 |
SETERROR(REG_ECOLLATE); /* neither */ |
|
850 |
return(0); |
|
851 |
}
|
|
852 |
||
853 |
/*
|
|
854 |
- othercase - return the case counterpart of an alphabetic
|
|
855 |
== static char othercase(int ch);
|
|
856 |
*/
|
|
857 |
static char /* if no counterpart, return ch */ |
|
858 |
othercase(charset,ch) |
|
859 |
CHARSET_INFO *charset; |
|
860 |
int ch; |
|
861 |
{
|
|
862 |
/*
|
|
863 |
In MySQL some multi-byte character sets
|
|
864 |
have 'ctype' array but don't have 'to_lower'
|
|
865 |
and 'to_upper' arrays. In this case we handle
|
|
866 |
only basic latin letters a..z and A..Z.
|
|
867 |
|
|
868 |
If 'to_lower' and 'to_upper' arrays are empty in a character set,
|
|
869 |
then my_isalpha(cs, ch) should never return TRUE for characters
|
|
870 |
other than basic latin letters. Otherwise it should be
|
|
871 |
considered as a mistake in character set definition.
|
|
872 |
*/
|
|
873 |
assert(my_isalpha(charset,ch)); |
|
874 |
if (my_isupper(charset,ch)) |
|
875 |
{
|
|
876 |
return(charset->to_lower ? my_tolower(charset,ch) : |
|
877 |
ch - 'A' + 'a'); |
|
878 |
}
|
|
879 |
else if (my_islower(charset,ch)) |
|
880 |
{
|
|
881 |
return(charset->to_upper ? my_toupper(charset,ch) : |
|
882 |
ch - 'a' + 'A'); |
|
883 |
}
|
|
884 |
else /* peculiar, but could happen */ |
|
885 |
return(ch); |
|
886 |
}
|
|
887 |
||
888 |
/*
|
|
889 |
- bothcases - emit a dualcase version of a two-case character
|
|
890 |
== static void bothcases(register struct parse *p, int ch);
|
|
891 |
*
|
|
892 |
* Boy, is this implementation ever a kludge...
|
|
893 |
*/
|
|
894 |
static void |
|
895 |
bothcases(p, ch) |
|
896 |
register struct parse *p; |
|
897 |
int ch; |
|
898 |
{
|
|
899 |
register char *oldnext = p->next; |
|
900 |
register char *oldend = p->end; |
|
901 |
char bracket[3]; |
|
902 |
||
903 |
assert(othercase(p->charset, ch) != ch); /* p_bracket() would recurse */ |
|
904 |
p->next = bracket; |
|
905 |
p->end = bracket+2; |
|
906 |
bracket[0] = ch; |
|
907 |
bracket[1] = ']'; |
|
908 |
bracket[2] = '\0'; |
|
909 |
p_bracket(p); |
|
910 |
assert(p->next == bracket+2); |
|
911 |
p->next = oldnext; |
|
912 |
p->end = oldend; |
|
913 |
}
|
|
914 |
||
915 |
/*
|
|
916 |
- ordinary - emit an ordinary character
|
|
917 |
== static void ordinary(register struct parse *p, register int ch);
|
|
918 |
*/
|
|
919 |
static void |
|
920 |
ordinary(p, ch) |
|
921 |
register struct parse *p; |
|
922 |
register int ch; |
|
923 |
{
|
|
924 |
register cat_t *cap = p->g->categories; |
|
925 |
||
926 |
if ((p->g->cflags®_ICASE) && my_isalpha(p->charset,ch) && |
|
927 |
othercase(p->charset,ch) != ch) |
|
928 |
bothcases(p, ch); |
|
929 |
else { |
|
930 |
EMIT(OCHAR, (unsigned char)ch); |
|
931 |
if (cap[ch] == 0) |
|
932 |
cap[ch] = p->g->ncategories++; |
|
933 |
}
|
|
934 |
}
|
|
935 |
||
936 |
/*
|
|
937 |
- nonnewline - emit REG_NEWLINE version of OANY
|
|
938 |
== static void nonnewline(register struct parse *p);
|
|
939 |
*
|
|
940 |
* Boy, is this implementation ever a kludge...
|
|
941 |
*/
|
|
942 |
static void |
|
943 |
nonnewline(p) |
|
944 |
register struct parse *p; |
|
945 |
{
|
|
946 |
register char *oldnext = p->next; |
|
947 |
register char *oldend = p->end; |
|
948 |
char bracket[4]; |
|
949 |
||
950 |
p->next = bracket; |
|
951 |
p->end = bracket+3; |
|
952 |
bracket[0] = '^'; |
|
953 |
bracket[1] = '\n'; |
|
954 |
bracket[2] = ']'; |
|
955 |
bracket[3] = '\0'; |
|
956 |
p_bracket(p); |
|
957 |
assert(p->next == bracket+3); |
|
958 |
p->next = oldnext; |
|
959 |
p->end = oldend; |
|
960 |
}
|
|
961 |
||
962 |
/*
|
|
963 |
- repeat - generate code for a bounded repetition, recursively if needed
|
|
964 |
== static void repeat(register struct parse *p, sopno start, int from, int to);
|
|
965 |
*/
|
|
966 |
static void |
|
967 |
repeat(p, start, from, to) |
|
968 |
register struct parse *p; |
|
969 |
sopno start; /* operand from here to end of strip */ |
|
970 |
int from; /* repeated from this number */ |
|
971 |
int to; /* to this number of times (maybe RE_INFINITY) */ |
|
972 |
{
|
|
973 |
register sopno finish = HERE(); |
|
974 |
# define N 2
|
|
975 |
# define INF 3
|
|
976 |
# define REP(f, t) ((f)*8 + (t))
|
|
977 |
# define MAP(n) (((n) <= 1) ? (n) : ((n) == RE_INFINITY) ? INF : N)
|
|
978 |
register sopno copy; |
|
979 |
||
980 |
if (p->error != 0) /* head off possible runaway recursion */ |
|
981 |
return; |
|
982 |
||
983 |
assert(from <= to); |
|
984 |
||
985 |
switch (REP(MAP(from), MAP(to))) { |
|
986 |
case REP(0, 0): /* must be user doing this */ |
|
987 |
DROP(finish-start); /* drop the operand */ |
|
988 |
break; |
|
989 |
case REP(0, 1): /* as x{1,1}? */ |
|
990 |
case REP(0, N): /* as x{1,n}? */ |
|
991 |
case REP(0, INF): /* as x{1,}? */ |
|
992 |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
|
|
993 |
INSERT(OCH_, start); /* offset is wrong... */ |
|
994 |
repeat(p, start+1, 1, to); |
|
995 |
ASTERN(OOR1, start); |
|
996 |
AHEAD(start); /* ... fix it */ |
|
997 |
EMIT(OOR2, 0); |
|
998 |
AHEAD(THERE()); |
|
999 |
ASTERN(O_CH, THERETHERE()); |
|
1000 |
break; |
|
1001 |
case REP(1, 1): /* trivial case */ |
|
1002 |
/* done */
|
|
1003 |
break; |
|
1004 |
case REP(1, N): /* as x?x{1,n-1} */ |
|
1005 |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
|
|
1006 |
INSERT(OCH_, start); |
|
1007 |
ASTERN(OOR1, start); |
|
1008 |
AHEAD(start); |
|
1009 |
EMIT(OOR2, 0); /* offset very wrong... */ |
|
1010 |
AHEAD(THERE()); /* ...so fix it */ |
|
1011 |
ASTERN(O_CH, THERETHERE()); |
|
1012 |
copy = dupl(p, start+1, finish+1); |
|
1013 |
assert(copy == finish+4); |
|
1014 |
repeat(p, copy, 1, to-1); |
|
1015 |
break; |
|
1016 |
case REP(1, INF): /* as x+ */ |
|
1017 |
INSERT(OPLUS_, start); |
|
1018 |
ASTERN(O_PLUS, start); |
|
1019 |
break; |
|
1020 |
case REP(N, N): /* as xx{m-1,n-1} */ |
|
1021 |
copy = dupl(p, start, finish); |
|
1022 |
repeat(p, copy, from-1, to-1); |
|
1023 |
break; |
|
1024 |
case REP(N, INF): /* as xx{n-1,INF} */ |
|
1025 |
copy = dupl(p, start, finish); |
|
1026 |
repeat(p, copy, from-1, to); |
|
1027 |
break; |
|
1028 |
default: /* "can't happen" */ |
|
1029 |
SETERROR(REG_ASSERT); /* just in case */ |
|
1030 |
break; |
|
1031 |
}
|
|
1032 |
}
|
|
1033 |
||
1034 |
/*
|
|
1035 |
- seterr - set an error condition
|
|
1036 |
== static int seterr(register struct parse *p, int e);
|
|
1037 |
*/
|
|
1038 |
static int /* useless but makes type checking happy */ |
|
1039 |
seterr(p, e) |
|
1040 |
register struct parse *p; |
|
1041 |
int e; |
|
1042 |
{
|
|
1043 |
if (p->error == 0) /* keep earliest error condition */ |
|
1044 |
p->error = e; |
|
1045 |
p->next = nuls; /* try to bring things to a halt */ |
|
1046 |
p->end = nuls; |
|
1047 |
return(0); /* make the return value well-defined */ |
|
1048 |
}
|
|
1049 |
||
1050 |
/*
|
|
1051 |
- allocset - allocate a set of characters for []
|
|
1052 |
== static cset *allocset(register struct parse *p);
|
|
1053 |
*/
|
|
1054 |
static cset * |
|
1055 |
allocset(p) |
|
1056 |
register struct parse *p; |
|
1057 |
{
|
|
1058 |
register int no = p->g->ncsets++; |
|
1059 |
register size_t nc; |
|
1060 |
register size_t nbytes; |
|
1061 |
register cset *cs; |
|
1062 |
register size_t css = (size_t)p->g->csetsize; |
|
1063 |
register int i; |
|
1064 |
||
1065 |
if (no >= p->ncsalloc) { /* need another column of space */ |
|
1066 |
p->ncsalloc += CHAR_BIT; |
|
1067 |
nc = p->ncsalloc; |
|
1068 |
assert(nc % CHAR_BIT == 0); |
|
1069 |
nbytes = nc / CHAR_BIT * css; |
|
1070 |
if (p->g->sets == NULL) |
|
1071 |
p->g->sets = (cset *)malloc(nc * sizeof(cset)); |
|
1072 |
else
|
|
1073 |
p->g->sets = (cset *)realloc((char *)p->g->sets, |
|
1074 |
nc * sizeof(cset)); |
|
1075 |
if (p->g->setbits == NULL) |
|
1076 |
p->g->setbits = (uch *)malloc(nbytes); |
|
1077 |
else { |
|
1078 |
p->g->setbits = (uch *)realloc((char *)p->g->setbits, |
|
1079 |
nbytes); |
|
1080 |
/* xxx this isn't right if setbits is now NULL */
|
|
1081 |
for (i = 0; i < no; i++) |
|
1082 |
p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); |
|
1083 |
}
|
|
1084 |
if (p->g->sets != NULL && p->g->setbits != NULL) |
|
1085 |
(void) memset((char *)p->g->setbits + (nbytes - css), |
|
1086 |
0, css); |
|
1087 |
else { |
|
1088 |
no = 0; |
|
1089 |
SETERROR(REG_ESPACE); |
|
1090 |
/* caller's responsibility not to do set ops */
|
|
1091 |
}
|
|
1092 |
}
|
|
1093 |
||
1094 |
assert(p->g->sets != NULL); /* xxx */ |
|
1095 |
cs = &p->g->sets[no]; |
|
1096 |
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); |
|
1097 |
cs->mask = 1 << ((no) % CHAR_BIT); |
|
1098 |
cs->hash = 0; |
|
1099 |
cs->smultis = 0; |
|
1100 |
cs->multis = NULL; |
|
1101 |
||
1102 |
return(cs); |
|
1103 |
}
|
|
1104 |
||
1105 |
/*
|
|
1106 |
- freeset - free a now-unused set
|
|
1107 |
== static void freeset(register struct parse *p, register cset *cs);
|
|
1108 |
*/
|
|
1109 |
static void |
|
1110 |
freeset(p, cs) |
|
1111 |
register struct parse *p; |
|
1112 |
register cset *cs; |
|
1113 |
{
|
|
1114 |
register size_t i; |
|
1115 |
register cset *top = &p->g->sets[p->g->ncsets]; |
|
1116 |
register size_t css = (size_t)p->g->csetsize; |
|
1117 |
||
1118 |
for (i = 0; i < css; i++) |
|
1119 |
CHsub(cs, i); |
|
1120 |
if (cs == top-1) /* recover only the easy case */ |
|
1121 |
p->g->ncsets--; |
|
1122 |
}
|
|
1123 |
||
1124 |
/*
|
|
1125 |
- freezeset - final processing on a set of characters
|
|
1126 |
== static int freezeset(register struct parse *p, register cset *cs);
|
|
1127 |
*
|
|
1128 |
* The main task here is merging identical sets. This is usually a waste
|
|
1129 |
* of time (although the hash code minimizes the overhead), but can win
|
|
1130 |
* big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
|
|
1131 |
* is done using addition rather than xor -- all ASCII [aA] sets xor to
|
|
1132 |
* the same value!
|
|
1133 |
*/
|
|
1134 |
static int /* set number */ |
|
1135 |
freezeset(p, cs) |
|
1136 |
register struct parse *p; |
|
1137 |
register cset *cs; |
|
1138 |
{
|
|
1139 |
register uch h = cs->hash; |
|
1140 |
register size_t i; |
|
1141 |
register cset *top = &p->g->sets[p->g->ncsets]; |
|
1142 |
register cset *cs2; |
|
1143 |
register size_t css = (size_t)p->g->csetsize; |
|
1144 |
||
1145 |
/* look for an earlier one which is the same */
|
|
1146 |
for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) |
|
1147 |
if (cs2->hash == h && cs2 != cs) { |
|
1148 |
/* maybe */
|
|
1149 |
for (i = 0; i < css; i++) |
|
1150 |
if (!!CHIN(cs2, i) != !!CHIN(cs, i)) |
|
1151 |
break; /* no */ |
|
1152 |
if (i == css) |
|
1153 |
break; /* yes */ |
|
1154 |
}
|
|
1155 |
||
1156 |
if (cs2 < top) { /* found one */ |
|
1157 |
freeset(p, cs); |
|
1158 |
cs = cs2; |
|
1159 |
}
|
|
1160 |
||
1161 |
return((int)(cs - p->g->sets)); |
|
1162 |
}
|
|
1163 |
||
1164 |
/*
|
|
1165 |
- firstch - return first character in a set (which must have at least one)
|
|
1166 |
== static int firstch(register struct parse *p, register cset *cs);
|
|
1167 |
*/
|
|
1168 |
static int /* character; there is no "none" value */ |
|
1169 |
firstch(p, cs) |
|
1170 |
register struct parse *p; |
|
1171 |
register cset *cs; |
|
1172 |
{
|
|
1173 |
register size_t i; |
|
1174 |
register size_t css = (size_t)p->g->csetsize; |
|
1175 |
||
1176 |
for (i = 0; i < css; i++) |
|
1177 |
if (CHIN(cs, i)) |
|
1178 |
return((char)i); |
|
1179 |
assert(never); |
|
1180 |
return(0); /* arbitrary */ |
|
1181 |
}
|
|
1182 |
||
1183 |
/*
|
|
1184 |
- nch - number of characters in a set
|
|
1185 |
== static int nch(register struct parse *p, register cset *cs);
|
|
1186 |
*/
|
|
1187 |
static int |
|
1188 |
nch(p, cs) |
|
1189 |
register struct parse *p; |
|
1190 |
register cset *cs; |
|
1191 |
{
|
|
1192 |
register size_t i; |
|
1193 |
register size_t css = (size_t)p->g->csetsize; |
|
1194 |
register int n = 0; |
|
1195 |
||
1196 |
for (i = 0; i < css; i++) |
|
1197 |
if (CHIN(cs, i)) |
|
1198 |
n++; |
|
1199 |
return(n); |
|
1200 |
}
|
|
1201 |
||
1202 |
#ifdef USE_ORIG_REGEX_CODE
|
|
1203 |
/*
|
|
1204 |
- mcadd - add a collating element to a cset
|
|
1205 |
== static void mcadd(register struct parse *p, register cset *cs, \
|
|
1206 |
== register char *cp);
|
|
1207 |
*/
|
|
1208 |
static void |
|
1209 |
mcadd(p, cs, cp) |
|
1210 |
register struct parse *p; |
|
1211 |
register cset *cs; |
|
1212 |
register char *cp; |
|
1213 |
{
|
|
1214 |
register size_t oldend = cs->smultis; |
|
1215 |
||
1216 |
cs->smultis += strlen(cp) + 1; |
|
1217 |
if (cs->multis == NULL) |
|
1218 |
cs->multis = malloc(cs->smultis); |
|
1219 |
else
|
|
1220 |
cs->multis = realloc(cs->multis, cs->smultis); |
|
1221 |
if (cs->multis == NULL) { |
|
1222 |
SETERROR(REG_ESPACE); |
|
1223 |
return; |
|
1224 |
}
|
|
1225 |
||
1226 |
(void) strcpy(cs->multis + oldend - 1, cp); |
|
1227 |
cs->multis[cs->smultis - 1] = '\0'; |
|
1228 |
}
|
|
1229 |
#endif
|
|
1230 |
||
1231 |
#ifdef NOT_USED
|
|
1232 |
/*
|
|
1233 |
- mcsub - subtract a collating element from a cset
|
|
1234 |
== static void mcsub(register cset *cs, register char *cp);
|
|
1235 |
*/
|
|
1236 |
static void |
|
1237 |
mcsub(cs, cp) |
|
1238 |
register cset *cs; |
|
1239 |
register char *cp; |
|
1240 |
{
|
|
1241 |
register char *fp = mcfind(cs, cp); |
|
1242 |
register size_t len = strlen(fp); |
|
1243 |
||
1244 |
assert(fp != NULL); |
|
1245 |
(void) memmove(fp, fp + len + 1, |
|
1246 |
cs->smultis - (fp + len + 1 - cs->multis)); |
|
1247 |
cs->smultis -= len; |
|
1248 |
||
1249 |
if (cs->smultis == 0) { |
|
1250 |
free(cs->multis); |
|
1251 |
cs->multis = NULL; |
|
1252 |
return; |
|
1253 |
}
|
|
1254 |
||
1255 |
cs->multis = realloc(cs->multis, cs->smultis); |
|
1256 |
assert(cs->multis != NULL); |
|
1257 |
}
|
|
1258 |
||
1259 |
/*
|
|
1260 |
- mcin - is a collating element in a cset?
|
|
1261 |
== static int mcin(register cset *cs, register char *cp);
|
|
1262 |
*/
|
|
1263 |
static int |
|
1264 |
mcin(cs, cp) |
|
1265 |
register cset *cs; |
|
1266 |
register char *cp; |
|
1267 |
{
|
|
1268 |
return(mcfind(cs, cp) != NULL); |
|
1269 |
}
|
|
1270 |
||
1271 |
/*
|
|
1272 |
- mcfind - find a collating element in a cset
|
|
1273 |
== static char *mcfind(register cset *cs, register char *cp);
|
|
1274 |
*/
|
|
1275 |
static char * |
|
1276 |
mcfind(cs, cp) |
|
1277 |
register cset *cs; |
|
1278 |
register char *cp; |
|
1279 |
{
|
|
1280 |
register char *p; |
|
1281 |
||
1282 |
if (cs->multis == NULL) |
|
1283 |
return(NULL); |
|
1284 |
for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) |
|
1285 |
if (strcmp(cp, p) == 0) |
|
1286 |
return(p); |
|
1287 |
return(NULL); |
|
1288 |
}
|
|
1289 |
#endif
|
|
1290 |
||
1291 |
/*
|
|
1292 |
- mcinvert - invert the list of collating elements in a cset
|
|
1293 |
== static void mcinvert(register struct parse *p, register cset *cs);
|
|
1294 |
*
|
|
1295 |
* This would have to know the set of possibilities. Implementation
|
|
1296 |
* is deferred.
|
|
1297 |
*/
|
|
1298 |
static void |
|
1299 |
mcinvert(p, cs) |
|
1300 |
register struct parse *p __attribute__((unused)); |
|
1301 |
register cset *cs __attribute__((unused)); |
|
1302 |
{
|
|
1303 |
assert(cs->multis == NULL); /* xxx */ |
|
1304 |
}
|
|
1305 |
||
1306 |
/*
|
|
1307 |
- mccase - add case counterparts of the list of collating elements in a cset
|
|
1308 |
== static void mccase(register struct parse *p, register cset *cs);
|
|
1309 |
*
|
|
1310 |
* This would have to know the set of possibilities. Implementation
|
|
1311 |
* is deferred.
|
|
1312 |
*/
|
|
1313 |
static void |
|
1314 |
mccase(p, cs) |
|
1315 |
register struct parse *p __attribute__((unused)); |
|
1316 |
register cset *cs __attribute__((unused)); |
|
1317 |
{
|
|
1318 |
assert(cs->multis == NULL); /* xxx */ |
|
1319 |
}
|
|
1320 |
||
1321 |
/*
|
|
1322 |
- isinsets - is this character in any sets?
|
|
1323 |
== static int isinsets(register struct re_guts *g, int c);
|
|
1324 |
*/
|
|
1325 |
static int /* predicate */ |
|
1326 |
isinsets(g, c) |
|
1327 |
register struct re_guts *g; |
|
1328 |
int c; |
|
1329 |
{
|
|
1330 |
register uch *col; |
|
1331 |
register int i; |
|
1332 |
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
|
1333 |
register unsigned uc = (unsigned char)c; |
|
1334 |
||
1335 |
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
|
1336 |
if (col[uc] != 0) |
|
1337 |
return(1); |
|
1338 |
return(0); |
|
1339 |
}
|
|
1340 |
||
1341 |
/*
|
|
1342 |
- samesets - are these two characters in exactly the same sets?
|
|
1343 |
== static int samesets(register struct re_guts *g, int c1, int c2);
|
|
1344 |
*/
|
|
1345 |
static int /* predicate */ |
|
1346 |
samesets(g, c1, c2) |
|
1347 |
register struct re_guts *g; |
|
1348 |
int c1; |
|
1349 |
int c2; |
|
1350 |
{
|
|
1351 |
register uch *col; |
|
1352 |
register int i; |
|
1353 |
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
|
1354 |
register unsigned uc1 = (unsigned char)c1; |
|
1355 |
register unsigned uc2 = (unsigned char)c2; |
|
1356 |
||
1357 |
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
|
1358 |
if (col[uc1] != col[uc2]) |
|
1359 |
return(0); |
|
1360 |
return(1); |
|
1361 |
}
|
|
1362 |
||
1363 |
/*
|
|
1364 |
- categorize - sort out character categories
|
|
1365 |
== static void categorize(struct parse *p, register struct re_guts *g);
|
|
1366 |
*/
|
|
1367 |
static void |
|
1368 |
categorize(p, g) |
|
1369 |
struct parse *p; |
|
1370 |
register struct re_guts *g; |
|
1371 |
{
|
|
1372 |
register cat_t *cats = g->categories; |
|
1373 |
register int c; |
|
1374 |
register int c2; |
|
1375 |
register cat_t cat; |
|
1376 |
||
1377 |
/* avoid making error situations worse */
|
|
1378 |
if (p->error != 0) |
|
1379 |
return; |
|
1380 |
||
1381 |
for (c = CHAR_MIN; c <= CHAR_MAX; c++) |
|
1382 |
if (cats[c] == 0 && isinsets(g, c)) { |
|
1383 |
cat = g->ncategories++; |
|
1384 |
cats[c] = cat; |
|
1385 |
for (c2 = c+1; c2 <= CHAR_MAX; c2++) |
|
1386 |
if (cats[c2] == 0 && samesets(g, c, c2)) |
|
1387 |
cats[c2] = cat; |
|
1388 |
}
|
|
1389 |
}
|
|
1390 |
||
1391 |
/*
|
|
1392 |
- dupl - emit a duplicate of a bunch of sops
|
|
1393 |
== static sopno dupl(register struct parse *p, sopno start, sopno finish);
|
|
1394 |
*/
|
|
1395 |
static sopno /* start of duplicate */ |
|
1396 |
dupl(p, start, finish) |
|
1397 |
register struct parse *p; |
|
1398 |
sopno start; /* from here */ |
|
1399 |
sopno finish; /* to this less one */ |
|
1400 |
{
|
|
1401 |
register sopno ret = HERE(); |
|
1402 |
register sopno len = finish - start; |
|
1403 |
||
1404 |
assert(finish >= start); |
|
1405 |
if (len == 0) |
|
1406 |
return(ret); |
|
1407 |
enlarge(p, p->ssize + len); /* this many unexpected additions */ |
|
1408 |
assert(p->ssize >= p->slen + len); |
|
1409 |
(void) memcpy((char *)(p->strip + p->slen), |
|
1410 |
(char *)(p->strip + start), (size_t)len*sizeof(sop)); |
|
1411 |
p->slen += len; |
|
1412 |
return(ret); |
|
1413 |
}
|
|
1414 |
||
1415 |
/*
|
|
1416 |
- doemit - emit a strip operator
|
|
1417 |
== static void doemit(register struct parse *p, sop op, size_t opnd);
|
|
1418 |
*
|
|
1419 |
* It might seem better to implement this as a macro with a function as
|
|
1420 |
* hard-case backup, but it's just too big and messy unless there are
|
|
1421 |
* some changes to the data structures. Maybe later.
|
|
1422 |
*/
|
|
1423 |
static void |
|
1424 |
doemit(p, op, opnd) |
|
1425 |
register struct parse *p; |
|
1426 |
sop op; |
|
1427 |
size_t opnd; |
|
1428 |
{
|
|
1429 |
/* avoid making error situations worse */
|
|
1430 |
if (p->error != 0) |
|
1431 |
return; |
|
1432 |
||
1433 |
/* deal with oversize operands ("can't happen", more or less) */
|
|
1434 |
assert(opnd < 1<<OPSHIFT); |
|
1435 |
||
1436 |
/* deal with undersized strip */
|
|
1437 |
if (p->slen >= p->ssize) |
|
1438 |
enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ |
|
1439 |
assert(p->slen < p->ssize); |
|
1440 |
||
1441 |
/* finally, it's all reduced to the easy case */
|
|
1442 |
p->strip[p->slen++] = SOP(op, opnd); |
|
1443 |
}
|
|
1444 |
||
1445 |
/*
|
|
1446 |
- doinsert - insert a sop into the strip
|
|
1447 |
== static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
|
|
1448 |
*/
|
|
1449 |
static void |
|
1450 |
doinsert(p, op, opnd, pos) |
|
1451 |
register struct parse *p; |
|
1452 |
sop op; |
|
1453 |
size_t opnd; |
|
1454 |
sopno pos; |
|
1455 |
{
|
|
1456 |
register sopno sn; |
|
1457 |
register sop s; |
|
1458 |
register int i; |
|
1459 |
||
1460 |
/* avoid making error situations worse */
|
|
1461 |
if (p->error != 0) |
|
1462 |
return; |
|
1463 |
||
1464 |
sn = HERE(); |
|
1465 |
EMIT(op, opnd); /* do checks, ensure space */ |
|
1466 |
assert(HERE() == sn+1); |
|
1467 |
s = p->strip[sn]; |
|
1468 |
||
1469 |
/* adjust paren pointers */
|
|
1470 |
assert(pos > 0); |
|
1471 |
for (i = 1; i < NPAREN; i++) { |
|
1472 |
if (p->pbegin[i] >= pos) { |
|
1473 |
p->pbegin[i]++; |
|
1474 |
}
|
|
1475 |
if (p->pend[i] >= pos) { |
|
1476 |
p->pend[i]++; |
|
1477 |
}
|
|
1478 |
}
|
|
1479 |
{
|
|
1480 |
int length=(HERE()-pos-1)*sizeof(sop); |
|
1481 |
bmove_upp((uchar *) &p->strip[pos+1]+length, |
|
1482 |
(uchar *) &p->strip[pos]+length, |
|
1483 |
length); |
|
1484 |
}
|
|
1485 |
#ifdef OLD_CODE
|
|
1486 |
memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], |
|
1487 |
(HERE()-pos-1)*sizeof(sop)); |
|
1488 |
#endif
|
|
1489 |
p->strip[pos] = s; |
|
1490 |
}
|
|
1491 |
||
1492 |
/*
|
|
1493 |
- dofwd - complete a forward reference
|
|
1494 |
== static void dofwd(register struct parse *p, sopno pos, sop value);
|
|
1495 |
*/
|
|
1496 |
static void |
|
1497 |
dofwd(p, pos, value) |
|
1498 |
register struct parse *p; |
|
1499 |
register sopno pos; |
|
1500 |
sop value; |
|
1501 |
{
|
|
1502 |
/* avoid making error situations worse */
|
|
1503 |
if (p->error != 0) |
|
1504 |
return; |
|
1505 |
||
1506 |
assert(value < 1<<OPSHIFT); |
|
1507 |
p->strip[pos] = OP(p->strip[pos]) | value; |
|
1508 |
}
|
|
1509 |
||
1510 |
/*
|
|
1511 |
- enlarge - enlarge the strip
|
|
1512 |
== static void enlarge(register struct parse *p, sopno size);
|
|
1513 |
*/
|
|
1514 |
static void |
|
1515 |
enlarge(p, size) |
|
1516 |
register struct parse *p; |
|
1517 |
register sopno size; |
|
1518 |
{
|
|
1519 |
register sop *sp; |
|
1520 |
||
1521 |
if (p->ssize >= size) |
|
1522 |
return; |
|
1523 |
||
1524 |
sp = (sop *)realloc(p->strip, size*sizeof(sop)); |
|
1525 |
if (sp == NULL) { |
|
1526 |
SETERROR(REG_ESPACE); |
|
1527 |
return; |
|
1528 |
}
|
|
1529 |
p->strip = sp; |
|
1530 |
p->ssize = size; |
|
1531 |
}
|
|
1532 |
||
1533 |
/*
|
|
1534 |
- stripsnug - compact the strip
|
|
1535 |
== static void stripsnug(register struct parse *p, register struct re_guts *g);
|
|
1536 |
*/
|
|
1537 |
static void |
|
1538 |
stripsnug(p, g) |
|
1539 |
register struct parse *p; |
|
1540 |
register struct re_guts *g; |
|
1541 |
{
|
|
1542 |
g->nstates = p->slen; |
|
1543 |
g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); |
|
1544 |
if (g->strip == NULL) { |
|
1545 |
SETERROR(REG_ESPACE); |
|
1546 |
g->strip = p->strip; |
|
1547 |
}
|
|
1548 |
}
|
|
1549 |
||
1550 |
/*
|
|
1551 |
- findmust - fill in must and mlen with longest mandatory literal string
|
|
1552 |
== static void findmust(register struct parse *p, register struct re_guts *g);
|
|
1553 |
*
|
|
1554 |
* This algorithm could do fancy things like analyzing the operands of |
|
|
1555 |
* for common subsequences. Someday. This code is simple and finds most
|
|
1556 |
* of the interesting cases.
|
|
1557 |
*
|
|
1558 |
* Note that must and mlen got initialized during setup.
|
|
1559 |
*/
|
|
1560 |
static void |
|
1561 |
findmust(p, g) |
|
1562 |
struct parse *p; |
|
1563 |
register struct re_guts *g; |
|
1564 |
{
|
|
1565 |
register sop *scan; |
|
1566 |
sop *start; |
|
1567 |
register sop *newstart; |
|
1568 |
register sopno newlen; |
|
1569 |
register sop s; |
|
1570 |
register char *cp; |
|
1571 |
register sopno i; |
|
1572 |
/* avoid making error situations worse */
|
|
1573 |
if (p->error != 0) |
|
1574 |
return; |
|
1575 |
||
1576 |
/* find the longest OCHAR sequence in strip */
|
|
1577 |
newlen = 0; |
|
1578 |
scan = g->strip + 1; |
|
1579 |
do { |
|
1580 |
s = *scan++; |
|
1581 |
switch (OP(s)) { |
|
1582 |
case OCHAR: /* sequence member */ |
|
1583 |
if (newlen == 0) /* new sequence */ |
|
1584 |
newstart = scan - 1; |
|
1585 |
newlen++; |
|
1586 |
break; |
|
1587 |
case OPLUS_: /* things that don't break one */ |
|
1588 |
case OLPAREN: |
|
1589 |
case ORPAREN: |
|
1590 |
break; |
|
1591 |
case OQUEST_: /* things that must be skipped */ |
|
1592 |
case OCH_: |
|
1593 |
scan--; |
|
1594 |
do { |
|
1595 |
scan += OPND(s); |
|
1596 |
s = *scan; |
|
1597 |
/* assert() interferes w debug printouts */
|
|
1598 |
if (OP(s) != O_QUEST && OP(s) != O_CH && |
|
1599 |
OP(s) != OOR2) { |
|
1600 |
g->iflags |= BAD; |
|
1601 |
return; |
|
1602 |
}
|
|
1603 |
} while (OP(s) != O_QUEST && OP(s) != O_CH); |
|
1604 |
/* fallthrough */
|
|
1605 |
default: /* things that break a sequence */ |
|
1606 |
if (newlen > g->mlen) { /* ends one */ |
|
1607 |
start = newstart; |
|
1608 |
g->mlen = newlen; |
|
1609 |
}
|
|
1610 |
newlen = 0; |
|
1611 |
break; |
|
1612 |
}
|
|
1613 |
} while (OP(s) != OEND); |
|
1614 |
||
1615 |
if (g->mlen == 0) /* there isn't one */ |
|
1616 |
return; |
|
1617 |
||
1618 |
/* turn it into a character string */
|
|
1619 |
g->must = malloc((size_t)g->mlen + 1); |
|
1620 |
if (g->must == NULL) { /* argh; just forget it */ |
|
1621 |
g->mlen = 0; |
|
1622 |
return; |
|
1623 |
}
|
|
1624 |
cp = g->must; |
|
1625 |
scan = start; |
|
1626 |
for (i = g->mlen; i > 0; i--) { |
|
1627 |
while (OP(s = *scan++) != OCHAR) |
|
1628 |
continue; |
|
1629 |
assert(cp < g->must + g->mlen); |
|
1630 |
*cp++ = (char)OPND(s); |
|
1631 |
}
|
|
1632 |
assert(cp == g->must + g->mlen); |
|
1633 |
*cp++ = '\0'; /* just on general principles */ |
|
1634 |
}
|
|
1635 |
||
1636 |
/*
|
|
1637 |
- pluscount - count + nesting
|
|
1638 |
== static sopno pluscount(register struct parse *p, register struct re_guts *g);
|
|
1639 |
*/
|
|
1640 |
static sopno /* nesting depth */ |
|
1641 |
pluscount(p, g) |
|
1642 |
struct parse *p; |
|
1643 |
register struct re_guts *g; |
|
1644 |
{
|
|
1645 |
register sop *scan; |
|
1646 |
register sop s; |
|
1647 |
register sopno plusnest = 0; |
|
1648 |
register sopno maxnest = 0; |
|
1649 |
||
1650 |
if (p->error != 0) |
|
1651 |
return(0); /* there may not be an OEND */ |
|
1652 |
||
1653 |
scan = g->strip + 1; |
|
1654 |
do { |
|
1655 |
s = *scan++; |
|
1656 |
switch (OP(s)) { |
|
1657 |
case OPLUS_: |
|
1658 |
plusnest++; |
|
1659 |
break; |
|
1660 |
case O_PLUS: |
|
1661 |
if (plusnest > maxnest) |
|
1662 |
maxnest = plusnest; |
|
1663 |
plusnest--; |
|
1664 |
break; |
|
1665 |
}
|
|
1666 |
} while (OP(s) != OEND); |
|
1667 |
if (plusnest != 0) |
|
1668 |
g->iflags |= BAD; |
|
1669 |
return(maxnest); |
|
1670 |
}
|