1
by brian
clean slate |
1 |
#include <my_global.h> |
2 |
#include <m_string.h> |
|
3 |
#include <sys/types.h> |
|
4 |
#include <assert.h> |
|
5 |
||
6 |
#include "my_regex.h" |
|
7 |
#include "main.ih" |
|
8 |
||
9 |
char *progname; |
|
10 |
int debug = 0; |
|
11 |
int line = 0; |
|
12 |
int status = 0; |
|
13 |
||
14 |
int copts = REG_EXTENDED; |
|
15 |
int eopts = 0; |
|
16 |
regoff_t startoff = 0; |
|
17 |
regoff_t endoff = 0; |
|
18 |
||
19 |
||
20 |
extern int split(); |
|
21 |
extern void regprint(); |
|
22 |
||
23 |
/*
|
|
24 |
- main - do the simple case, hand off to regress() for regression
|
|
25 |
*/
|
|
26 |
int main(argc, argv) |
|
27 |
int argc; |
|
28 |
char *argv[]; |
|
29 |
{
|
|
30 |
my_regex_t re; |
|
31 |
# define NS 10
|
|
32 |
my_regmatch_t subs[NS]; |
|
33 |
char erbuf[100]; |
|
34 |
int err; |
|
35 |
size_t len; |
|
36 |
int c; |
|
37 |
int errflg = 0; |
|
38 |
register int i; |
|
39 |
extern int optind; |
|
40 |
extern char *optarg; |
|
41 |
||
42 |
progname = argv[0]; |
|
43 |
||
44 |
while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) |
|
45 |
switch (c) { |
|
46 |
case 'c': /* compile options */ |
|
47 |
copts = options('c', optarg); |
|
48 |
break; |
|
49 |
case 'e': /* execute options */ |
|
50 |
eopts = options('e', optarg); |
|
51 |
break; |
|
52 |
case 'S': /* start offset */ |
|
53 |
startoff = (regoff_t)atoi(optarg); |
|
54 |
break; |
|
55 |
case 'E': /* end offset */ |
|
56 |
endoff = (regoff_t)atoi(optarg); |
|
57 |
break; |
|
58 |
case 'x': /* Debugging. */ |
|
59 |
debug++; |
|
60 |
break; |
|
61 |
case '?': |
|
62 |
default: |
|
63 |
errflg++; |
|
64 |
break; |
|
65 |
}
|
|
66 |
if (errflg) { |
|
67 |
fprintf(stderr, "usage: %s ", progname); |
|
68 |
fprintf(stderr, "[-c copt][-C][-d] [re]\n"); |
|
69 |
exit(2); |
|
70 |
}
|
|
71 |
||
72 |
if (optind >= argc) { |
|
73 |
regress(stdin); |
|
74 |
exit(status); |
|
75 |
}
|
|
76 |
||
77 |
err = my_regcomp(&re, argv[optind++], copts, &my_charset_latin1); |
|
78 |
if (err) { |
|
79 |
len = my_regerror(err, &re, erbuf, sizeof(erbuf)); |
|
80 |
fprintf(stderr, "error %s, %d/%d `%s'\n", |
|
81 |
eprint(err), (int) len, (int) sizeof(erbuf), erbuf); |
|
82 |
exit(status); |
|
83 |
}
|
|
84 |
regprint(&re, stdout); |
|
85 |
||
86 |
if (optind >= argc) { |
|
87 |
my_regfree(&re); |
|
88 |
exit(status); |
|
89 |
}
|
|
90 |
||
91 |
if (eopts®_STARTEND) { |
|
92 |
subs[0].rm_so = startoff; |
|
93 |
subs[0].rm_eo = strlen(argv[optind]) - endoff; |
|
94 |
}
|
|
95 |
err = my_regexec(&re, argv[optind], (size_t)NS, subs, eopts); |
|
96 |
if (err) { |
|
97 |
len = my_regerror(err, &re, erbuf, sizeof(erbuf)); |
|
98 |
fprintf(stderr, "error %s, %d/%d `%s'\n", |
|
99 |
eprint(err), (int) len, (int) sizeof(erbuf), erbuf); |
|
100 |
exit(status); |
|
101 |
}
|
|
102 |
if (!(copts®_NOSUB)) { |
|
103 |
len = (int)(subs[0].rm_eo - subs[0].rm_so); |
|
104 |
if (subs[0].rm_so != -1) { |
|
105 |
if (len != 0) |
|
106 |
printf("match `%.*s'\n", (int)len, |
|
107 |
argv[optind] + subs[0].rm_so); |
|
108 |
else
|
|
109 |
printf("match `'@%.1s\n", |
|
110 |
argv[optind] + subs[0].rm_so); |
|
111 |
}
|
|
112 |
for (i = 1; i < NS; i++) |
|
113 |
if (subs[i].rm_so != -1) |
|
114 |
printf("(%d) `%.*s'\n", i, |
|
115 |
(int)(subs[i].rm_eo - subs[i].rm_so), |
|
116 |
argv[optind] + subs[i].rm_so); |
|
117 |
}
|
|
118 |
exit(status); |
|
119 |
}
|
|
120 |
||
121 |
/*
|
|
122 |
- regress - main loop of regression test
|
|
123 |
== void regress(FILE *in);
|
|
124 |
*/
|
|
125 |
void
|
|
126 |
regress(in) |
|
127 |
FILE *in; |
|
128 |
{
|
|
129 |
char inbuf[1000]; |
|
130 |
# define MAXF 10
|
|
131 |
char *f[MAXF]; |
|
132 |
int nf; |
|
133 |
int i; |
|
134 |
char erbuf[100]; |
|
135 |
size_t ne; |
|
136 |
const char *badpat = "invalid regular expression"; |
|
137 |
# define SHORT 10
|
|
138 |
const char *bpname = "REG_BADPAT"; |
|
139 |
my_regex_t re; |
|
140 |
||
141 |
while (fgets(inbuf, sizeof(inbuf), in) != NULL) { |
|
142 |
line++; |
|
143 |
if (inbuf[0] == '#' || inbuf[0] == '\n') |
|
144 |
continue; /* NOTE CONTINUE */ |
|
145 |
inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ |
|
146 |
if (debug) |
|
147 |
fprintf(stdout, "%d:\n", line); |
|
148 |
nf = split(inbuf, f, MAXF, "\t\t"); |
|
149 |
if (nf < 3) { |
|
150 |
fprintf(stderr, "bad input, line %d\n", line); |
|
151 |
exit(1); |
|
152 |
}
|
|
153 |
for (i = 0; i < nf; i++) |
|
154 |
if (strcmp(f[i], "\"\"") == 0) |
|
155 |
f[i] = (char*) ""; |
|
156 |
if (nf <= 3) |
|
157 |
f[3] = NULL; |
|
158 |
if (nf <= 4) |
|
159 |
f[4] = NULL; |
|
160 |
rx_try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); |
|
161 |
if (opt('&', f[1])) /* try with either type of RE */ |
|
162 |
rx_try(f[0], f[1], f[2], f[3], f[4], |
|
163 |
options('c', f[1]) &~ REG_EXTENDED); |
|
164 |
}
|
|
165 |
||
166 |
ne = my_regerror(REG_BADPAT, (my_regex_t *)NULL, erbuf, sizeof(erbuf)); |
|
167 |
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { |
|
168 |
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", |
|
169 |
erbuf, badpat); |
|
170 |
status = 1; |
|
171 |
}
|
|
172 |
ne = my_regerror(REG_BADPAT, (my_regex_t *)NULL, erbuf, (size_t)SHORT); |
|
173 |
if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || |
|
174 |
ne != strlen(badpat)+1) { |
|
175 |
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", |
|
176 |
erbuf, SHORT-1, badpat); |
|
177 |
status = 1; |
|
178 |
}
|
|
179 |
ne = my_regerror(REG_ITOA|REG_BADPAT, (my_regex_t *)NULL, erbuf, sizeof(erbuf)); |
|
180 |
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { |
|
181 |
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", |
|
182 |
erbuf, bpname); |
|
183 |
status = 1; |
|
184 |
}
|
|
185 |
re.re_endp = bpname; |
|
186 |
ne = my_regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); |
|
187 |
if (atoi(erbuf) != (int)REG_BADPAT) { |
|
188 |
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", |
|
189 |
erbuf, (long)REG_BADPAT); |
|
190 |
status = 1; |
|
191 |
} else if (ne != strlen(erbuf)+1) { |
|
192 |
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", |
|
193 |
erbuf, (long)REG_BADPAT); |
|
194 |
status = 1; |
|
195 |
}
|
|
196 |
}
|
|
197 |
||
198 |
/*
|
|
199 |
- rx_try - try it, and report on problems
|
|
200 |
== void rx_try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
|
|
201 |
*/
|
|
202 |
void
|
|
203 |
rx_try(f0, f1, f2, f3, f4, opts) |
|
204 |
char *f0; |
|
205 |
char *f1; |
|
206 |
char *f2; |
|
207 |
char *f3; |
|
208 |
char *f4; |
|
209 |
int opts; /* may not match f1 */ |
|
210 |
{
|
|
211 |
my_regex_t re; |
|
212 |
# define NSUBS 10
|
|
213 |
my_regmatch_t subs[NSUBS]; |
|
214 |
# define NSHOULD 15
|
|
215 |
char *should[NSHOULD]; |
|
216 |
int nshould; |
|
217 |
char erbuf[100]; |
|
218 |
int err; |
|
219 |
int len; |
|
220 |
const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; |
|
221 |
register int i; |
|
222 |
char *grump; |
|
223 |
char f0copy[1000]; |
|
224 |
char f2copy[1000]; |
|
225 |
||
226 |
strcpy(f0copy, f0); |
|
227 |
re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; |
|
228 |
fixstr(f0copy); |
|
229 |
err = my_regcomp(&re, f0copy, opts, &my_charset_latin1); |
|
230 |
if (err != 0 && (!opt('C', f1) || err != efind(f2))) { |
|
231 |
/* unexpected error or wrong error */
|
|
232 |
len = my_regerror(err, &re, erbuf, sizeof(erbuf)); |
|
233 |
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", |
|
234 |
line, type, eprint(err), len, |
|
235 |
(int) sizeof(erbuf), erbuf); |
|
236 |
status = 1; |
|
237 |
} else if (err == 0 && opt('C', f1)) { |
|
238 |
/* unexpected success */
|
|
239 |
fprintf(stderr, "%d: %s should have given REG_%s\n", |
|
240 |
line, type, f2); |
|
241 |
status = 1; |
|
242 |
err = 1; /* so we won't try regexec */ |
|
243 |
}
|
|
244 |
||
245 |
if (err != 0) { |
|
246 |
my_regfree(&re); |
|
247 |
return; |
|
248 |
}
|
|
249 |
||
250 |
strcpy(f2copy, f2); |
|
251 |
fixstr(f2copy); |
|
252 |
||
253 |
if (options('e', f1)®_STARTEND) { |
|
254 |
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) |
|
255 |
fprintf(stderr, "%d: bad STARTEND syntax\n", line); |
|
256 |
subs[0].rm_so = strchr(f2, '(') - f2 + 1; |
|
257 |
subs[0].rm_eo = strchr(f2, ')') - f2; |
|
258 |
}
|
|
259 |
err = my_regexec(&re, f2copy, NSUBS, subs, options('e', f1)); |
|
260 |
||
261 |
if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { |
|
262 |
/* unexpected error or wrong error */
|
|
263 |
len = my_regerror(err, &re, erbuf, sizeof(erbuf)); |
|
264 |
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", |
|
265 |
line, type, eprint(err), len, |
|
266 |
(int) sizeof(erbuf), erbuf); |
|
267 |
status = 1; |
|
268 |
} else if (err != 0) { |
|
269 |
/* nothing more to check */
|
|
270 |
} else if (f3 == NULL) { |
|
271 |
/* unexpected success */
|
|
272 |
fprintf(stderr, "%d: %s exec should have failed\n", |
|
273 |
line, type); |
|
274 |
status = 1; |
|
275 |
err = 1; /* just on principle */ |
|
276 |
} else if (opts®_NOSUB) { |
|
277 |
/* nothing more to check */
|
|
278 |
} else if ((grump = check(f2, subs[0], f3)) != NULL) { |
|
279 |
fprintf(stderr, "%d: %s %s\n", line, type, grump); |
|
280 |
status = 1; |
|
281 |
err = 1; |
|
282 |
}
|
|
283 |
||
284 |
if (err != 0 || f4 == NULL) { |
|
285 |
my_regfree(&re); |
|
286 |
return; |
|
287 |
}
|
|
288 |
||
289 |
for (i = 1; i < NSHOULD; i++) |
|
290 |
should[i] = NULL; |
|
291 |
nshould = split(f4, should+1, NSHOULD-1, ","); |
|
292 |
if (nshould == 0) { |
|
293 |
nshould = 1; |
|
294 |
should[1] = (char*) ""; |
|
295 |
}
|
|
296 |
for (i = 1; i < NSUBS; i++) { |
|
297 |
grump = check(f2, subs[i], should[i]); |
|
298 |
if (grump != NULL) { |
|
299 |
fprintf(stderr, "%d: %s $%d %s\n", line, |
|
300 |
type, i, grump); |
|
301 |
status = 1; |
|
302 |
err = 1; |
|
303 |
}
|
|
304 |
}
|
|
305 |
||
306 |
my_regfree(&re); |
|
307 |
}
|
|
308 |
||
309 |
/*
|
|
310 |
- options - pick options out of a regression-test string
|
|
311 |
== int options(int type, char *s);
|
|
312 |
*/
|
|
313 |
int
|
|
314 |
options(type, s) |
|
315 |
int type; /* 'c' compile, 'e' exec */ |
|
316 |
char *s; |
|
317 |
{
|
|
318 |
register char *p; |
|
319 |
register int o = (type == 'c') ? copts : eopts; |
|
320 |
register const char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; |
|
321 |
||
322 |
for (p = s; *p != '\0'; p++) |
|
323 |
if (strchr(legal, *p) != NULL) |
|
324 |
switch (*p) { |
|
325 |
case 'b': |
|
326 |
o &= ~REG_EXTENDED; |
|
327 |
break; |
|
328 |
case 'i': |
|
329 |
o |= REG_ICASE; |
|
330 |
break; |
|
331 |
case 's': |
|
332 |
o |= REG_NOSUB; |
|
333 |
break; |
|
334 |
case 'n': |
|
335 |
o |= REG_NEWLINE; |
|
336 |
break; |
|
337 |
case 'm': |
|
338 |
o &= ~REG_EXTENDED; |
|
339 |
o |= REG_NOSPEC; |
|
340 |
break; |
|
341 |
case 'p': |
|
342 |
o |= REG_PEND; |
|
343 |
break; |
|
344 |
case '^': |
|
345 |
o |= REG_NOTBOL; |
|
346 |
break; |
|
347 |
case '$': |
|
348 |
o |= REG_NOTEOL; |
|
349 |
break; |
|
350 |
case '#': |
|
351 |
o |= REG_STARTEND; |
|
352 |
break; |
|
353 |
case 't': /* trace */ |
|
354 |
o |= REG_TRACE; |
|
355 |
break; |
|
356 |
case 'l': /* force long representation */ |
|
357 |
o |= REG_LARGE; |
|
358 |
break; |
|
359 |
case 'r': /* force backref use */ |
|
360 |
o |= REG_BACKR; |
|
361 |
break; |
|
362 |
}
|
|
363 |
return(o); |
|
364 |
}
|
|
365 |
||
366 |
/*
|
|
367 |
- opt - is a particular option in a regression string?
|
|
368 |
== int opt(int c, char *s);
|
|
369 |
*/
|
|
370 |
int /* predicate */ |
|
371 |
opt(c, s) |
|
372 |
int c; |
|
373 |
char *s; |
|
374 |
{
|
|
375 |
return(strchr(s, c) != NULL); |
|
376 |
}
|
|
377 |
||
378 |
/*
|
|
379 |
- fixstr - transform magic characters in strings
|
|
380 |
== void fixstr(register char *p);
|
|
381 |
*/
|
|
382 |
void
|
|
383 |
fixstr(p) |
|
384 |
register char *p; |
|
385 |
{
|
|
386 |
if (p == NULL) |
|
387 |
return; |
|
388 |
||
389 |
for (; *p != '\0'; p++) |
|
390 |
if (*p == 'N') |
|
391 |
*p = '\n'; |
|
392 |
else if (*p == 'T') |
|
393 |
*p = '\t'; |
|
394 |
else if (*p == 'S') |
|
395 |
*p = ' '; |
|
396 |
else if (*p == 'Z') |
|
397 |
*p = '\0'; |
|
398 |
}
|
|
399 |
||
400 |
/*
|
|
401 |
- check - check a substring match
|
|
402 |
== char *check(char *str, regmatch_t sub, char *should);
|
|
403 |
*/
|
|
404 |
char * /* NULL or complaint */ |
|
405 |
check(str, sub, should) |
|
406 |
char *str; |
|
407 |
my_regmatch_t sub; |
|
408 |
char *should; |
|
409 |
{
|
|
410 |
register int len; |
|
411 |
register int shlen; |
|
412 |
register char *p; |
|
413 |
static char grump[500]; |
|
414 |
register char *at = NULL; |
|
415 |
||
416 |
if (should != NULL && strcmp(should, "-") == 0) |
|
417 |
should = NULL; |
|
418 |
if (should != NULL && should[0] == '@') { |
|
419 |
at = should + 1; |
|
420 |
should = (char*) ""; |
|
421 |
}
|
|
422 |
||
423 |
/* check rm_so and rm_eo for consistency */
|
|
424 |
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || |
|
425 |
(sub.rm_so != -1 && sub.rm_eo == -1) || |
|
426 |
(sub.rm_so != -1 && sub.rm_so < 0) || |
|
427 |
(sub.rm_eo != -1 && sub.rm_eo < 0) ) { |
|
428 |
sprintf(grump, "start %ld end %ld", (long)sub.rm_so, |
|
429 |
(long)sub.rm_eo); |
|
430 |
return(grump); |
|
431 |
}
|
|
432 |
||
433 |
/* check for no match */
|
|
434 |
if (sub.rm_so == -1 && should == NULL) |
|
435 |
return(NULL); |
|
436 |
if (sub.rm_so == -1) |
|
437 |
return((char*) "did not match"); |
|
438 |
||
439 |
/* check for in range */
|
|
440 |
if ((int) sub.rm_eo > (int) strlen(str)) { |
|
441 |
sprintf(grump, "start %ld end %ld, past end of string", |
|
442 |
(long)sub.rm_so, (long)sub.rm_eo); |
|
443 |
return(grump); |
|
444 |
}
|
|
445 |
||
446 |
len = (int)(sub.rm_eo - sub.rm_so); |
|
447 |
shlen = (int)strlen(should); |
|
448 |
p = str + sub.rm_so; |
|
449 |
||
450 |
/* check for not supposed to match */
|
|
451 |
if (should == NULL) { |
|
452 |
sprintf(grump, "matched `%.*s'", len, p); |
|
453 |
return(grump); |
|
454 |
}
|
|
455 |
||
456 |
/* check for wrong match */
|
|
457 |
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { |
|
458 |
sprintf(grump, "matched `%.*s' instead", len, p); |
|
459 |
return(grump); |
|
460 |
}
|
|
461 |
if (shlen > 0) |
|
462 |
return(NULL); |
|
463 |
||
464 |
/* check null match in right place */
|
|
465 |
if (at == NULL) |
|
466 |
return(NULL); |
|
467 |
shlen = strlen(at); |
|
468 |
if (shlen == 0) |
|
469 |
shlen = 1; /* force check for end-of-string */ |
|
470 |
if (strncmp(p, at, shlen) != 0) { |
|
471 |
sprintf(grump, "matched null at `%.20s'", p); |
|
472 |
return(grump); |
|
473 |
}
|
|
474 |
return(NULL); |
|
475 |
}
|
|
476 |
||
477 |
/*
|
|
478 |
- eprint - convert error number to name
|
|
479 |
== static char *eprint(int err);
|
|
480 |
*/
|
|
481 |
static char * |
|
482 |
eprint(err) |
|
483 |
int err; |
|
484 |
{
|
|
485 |
static char epbuf[100]; |
|
486 |
size_t len; |
|
487 |
||
488 |
len = my_regerror(REG_ITOA|err, (my_regex_t *)NULL, epbuf, sizeof(epbuf)); |
|
489 |
assert(len <= sizeof(epbuf)); |
|
490 |
return(epbuf); |
|
491 |
}
|
|
492 |
||
493 |
/*
|
|
494 |
- efind - convert error name to number
|
|
495 |
== static int efind(char *name);
|
|
496 |
*/
|
|
497 |
static int |
|
498 |
efind(name) |
|
499 |
char *name; |
|
500 |
{
|
|
501 |
static char efbuf[100]; |
|
502 |
my_regex_t re; |
|
503 |
||
504 |
sprintf(efbuf, "REG_%s", name); |
|
505 |
assert(strlen(efbuf) < sizeof(efbuf)); |
|
506 |
re.re_endp = efbuf; |
|
507 |
(void) my_regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); |
|
508 |
return(atoi(efbuf)); |
|
509 |
}
|