~drizzle-trunk/drizzle/development

0.67.305 by Bernt M. Johnsen
Copyright headres and license added
1
# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
2
# Use is subject to license terms.
3
#
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
7
#
8
# This program is distributed in the hope that it will be useful, but
9
# WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
# General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
16
# USA
17
0.67.1 by Philip Stoev
initial import from internal tree
18
package GenTest::Grammar;
19
20
require Exporter;
21
@ISA = qw(GenTest);
0.67.9 by Philip Stoev
merge from internal tree
22
@EXPORT = qw(
23
	GRAMMAR_FLAG_COMPACT_RULES
24
);
0.67.1 by Philip Stoev
initial import from internal tree
25
26
use strict;
27
28
use GenTest;
29
use GenTest::Constants;
30
use GenTest::Grammar::Rule;
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
31
use GenTest::Random;
32
33
use Data::Dumper;
0.67.1 by Philip Stoev
initial import from internal tree
34
35
use constant GRAMMAR_RULES	=> 0;
36
use constant GRAMMAR_FILE	=> 1;
37
use constant GRAMMAR_STRING	=> 2;
0.67.9 by Philip Stoev
merge from internal tree
38
use constant GRAMMAR_FLAGS	=> 3;
39
40
use constant GRAMMAR_FLAG_COMPACT_RULES	=> 1;
0.67.1 by Philip Stoev
initial import from internal tree
41
42
1;
43
44
sub new {
45
	my $class = shift;
46
47
48
	my $grammar = $class->SUPER::new({
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
49
		'grammar_file'			=> GRAMMAR_FILE,
50
		'grammar_string'		=> GRAMMAR_STRING,
51
		'grammar_flags'		=> GRAMMAR_FLAGS,
52
		'grammar_rules'		=> GRAMMAR_RULES
0.67.1 by Philip Stoev
initial import from internal tree
53
	}, @_);
54
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
55
56
    if (defined $grammar->rules()) {
57
        $grammar->[GRAMMAR_STRING] = $grammar->toString();
58
    } else {
59
        $grammar->[GRAMMAR_RULES] = {};
60
        
61
        if (defined $grammar->file()) {
62
            my $parse_result = $grammar->parseFromFile($grammar->file());
63
            return undef if $parse_result > STATUS_OK;
64
        }
65
        
66
        if (defined $grammar->string()) {
67
            my $parse_result = $grammar->parseFromString($grammar->string());
68
            return undef if $parse_result > STATUS_OK;
69
        }
70
    }
0.67.1 by Philip Stoev
initial import from internal tree
71
72
	return $grammar;
73
}
74
75
sub file {
76
	return $_[0]->[GRAMMAR_FILE];
77
}
78
79
sub string {
80
	return $_[0]->[GRAMMAR_STRING];
81
}
82
83
84
sub toString {
85
	my $grammar = shift;
86
	my $rules = $grammar->rules();
87
	return join("\n\n", map { $grammar->rule($_)->toString() } sort keys %$rules);
88
}
89
90
91
sub parseFromFile {
92
	my ($grammar, $grammar_file) = @_;
93
94
	open (GF, $grammar_file) or die "Unable to open() grammar $grammar_file: $!";
95
	read (GF, my $grammar_string, -s $grammar_file) or die "Unable to read() $grammar_file: $!";
96
0.67.9 by Philip Stoev
merge from internal tree
97
	$grammar->[GRAMMAR_STRING] = $grammar_string;
98
0.67.1 by Philip Stoev
initial import from internal tree
99
	return $grammar->parseFromString($grammar_string);
100
}
101
102
sub parseFromString {
103
	my ($grammar, $grammar_string) = @_;
104
105
	#
106
	# provide an #include directive 
107
	#
108
109
	while ($grammar_string =~ s{#include [<"](.*?)[>"]$}{
110
		{
111
			my $include_string;
112
			my $include_file = $1;
113
		        open (IF, $1) or die "Unable to open include file $include_file: $!";
114
		        read (IF, my $include_string, -s $include_file) or die "Unable to open $include_file: $!";
115
			$include_string;
116
	}}mie) {};
117
118
	# Strip comments. Note that this is not Perl-code safe, since perl fragments 
119
	# can contain both comments with # and the $# expression. A proper lexer will fix this
120
	
121
	$grammar_string =~ s{#.*$}{}iomg;
122
123
	# Join lines ending in \
124
125
	$grammar_string =~ s{\\$}{ }iomg;
126
127
	# Strip end-line whitespace
128
129
	$grammar_string =~ s{\s+$}{}iomg;
130
131
	# Add terminating \n to ease parsing
132
133
	$grammar_string = $grammar_string."\n";
134
135
	my @rule_strings = split (";\s*[\r\n]+", $grammar_string);
136
137
	my %rules;
138
139
	foreach my $rule_string (@rule_strings) {
0.67.9 by Philip Stoev
merge from internal tree
140
		my ($rule_name, $components_string) = $rule_string =~ m{^(.*?)\s*:(.*)$}sio;
141
0.67.1 by Philip Stoev
initial import from internal tree
142
		$rule_name =~ s{[\r\n]}{}gsio;
143
		$rule_name =~ s{^\s*}{}gsio;
144
145
		next if $rule_name eq '';
146
0.67.9 by Philip Stoev
merge from internal tree
147
		say("Warning: Rule $rule_name is defined twice.") if exists $rules{$rule_name};
0.67.1 by Philip Stoev
initial import from internal tree
148
149
		my @component_strings = split (m{\|}, $components_string);
150
		my @components;
0.67.9 by Philip Stoev
merge from internal tree
151
		my %components;
152
153
		foreach my $component_string (@component_strings) {
0.67.1 by Philip Stoev
initial import from internal tree
154
			# Remove leading whitespace
0.67.9 by Philip Stoev
merge from internal tree
155
			$component_string =~ s{^\s+}{}sgio;
156
			$component_string =~ s{\s+$}{}sgio;
0.67.1 by Philip Stoev
initial import from internal tree
157
		
158
			# Rempove repeating whitespaces
0.67.9 by Philip Stoev
merge from internal tree
159
			$component_string =~ s{\s+}{ }sgio;
0.67.1 by Philip Stoev
initial import from internal tree
160
161
			# Split this so that each identifier is separated from all syntax elements
162
			# The identifier can start with a lowercase letter or an underscore , plus quotes
163
0.67.9 by Philip Stoev
merge from internal tree
164
			$component_string =~ s{([_a-z0-9'"`\{\}\$\[\]]+)}{|$1|}sgo;
0.67.1 by Philip Stoev
initial import from internal tree
165
166
			# Revert overzealous splitting that splits things like _varchar(32) into several tokens
167
		
0.67.9 by Philip Stoev
merge from internal tree
168
			$component_string =~ s{([a-z0-9_]+)\|\(\|(\d+)\|\)}{$1($2)|}sgo;
0.67.1 by Philip Stoev
initial import from internal tree
169
170
			# Remove leading and trailing pipes
0.67.9 by Philip Stoev
merge from internal tree
171
			$component_string =~ s{^\|}{}sgio;
172
			$component_string =~ s{\|$}{}sgio;
173
174
			if (
175
				(exists $components{$component_string}) &&
176
				($grammar->[GRAMMAR_FLAGS] & GRAMMAR_FLAG_COMPACT_RULES)
177
			) {
178
				next;
179
			} else {
180
				$components{$component_string}++;
181
			}
182
183
			my @component_parts = split (m{\|}, $component_string);
0.67.1 by Philip Stoev
initial import from internal tree
184
185
			#
186
			# If this grammar rule contains Perl code, assemble it between the various
187
			# component parts it was split into. This "reconstructive" step is definitely bad design
188
			# The way to do it properly would be to tokenize the grammar using a full-blown lexer
189
			# which should hopefully come up in a future version.
190
			#
191
192
			my $nesting_level = 0;
193
			my $pos = 0;
194
			my $code_start;
195
196
			while (1) {
197
				if ($component_parts[$pos] =~ m{\{}so) {
198
					$code_start = $pos if $nesting_level == 0;	# Code segment starts here
199
					my $bracket_count = ($component_parts[$pos] =~ tr/{//);
200
					$nesting_level = $nesting_level + $bracket_count;
201
				}
202
				
203
				if ($component_parts[$pos] =~ m{\}}so) {
204
					my $bracket_count = ($component_parts[$pos] =~ tr/}//);
205
					$nesting_level = $nesting_level - $bracket_count;
206
					if ($nesting_level == 0) {
207
						# Resemble the entire Perl code segment into a single string
208
						splice(@component_parts, $code_start, ($pos - $code_start + 1) , join ('', @component_parts[$code_start..$pos]));
209
						$pos = $code_start + 1;
210
						$code_start = undef;
211
					}
212
				}
213
				last if $pos > $#component_parts;
214
				$pos++;
215
			}
216
217
			push @components, \@component_parts;
218
		}
219
220
		my $rule = GenTest::Grammar::Rule->new(
221
			name => $rule_name,
222
			components => \@components
223
		);
224
		$rules{$rule_name} = $rule;
225
	}
226
227
	$grammar->[GRAMMAR_RULES] = \%rules;
228
	return STATUS_OK;
229
}
230
231
sub rule {
232
	return $_[0]->[GRAMMAR_RULES]->{$_[1]};
233
}
234
235
sub rules {
236
	return $_[0]->[GRAMMAR_RULES];
237
}
238
239
sub deleteRule {
240
	delete $_[0]->[GRAMMAR_RULES]->{$_[1]};
241
}
242
0.67.9 by Philip Stoev
merge from internal tree
243
#
244
# Check if the grammar is tagged with query properties such as RESULTSET_ or ERROR_1234
245
#
246
247
sub hasProperties {
0.67.127 by Philip Stoev
new QUERY_IS_REPLICATION_SAFE property
248
	if ($_[0]->[GRAMMAR_STRING] =~ m{RESULTSET_|ERROR_|QUERY_}so) {
0.67.9 by Philip Stoev
merge from internal tree
249
		return 1;
250
	} else {
251
		return 0;
252
	}
253
}
254
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
255
##
256
## Make a new grammar using the patch_grammar to replace old rules and
257
## add new rules.
258
##
259
sub patch {
260
    my ($self, $patch_grammar) = @_;
261
262
    my $patch_rules = $patch_grammar->rules();
263
264
    my $rules = $self->rules();
265
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
266
    foreach my $ruleName (keys %$patch_rules) {
267
        $rules->{$ruleName} = $patch_rules->{$ruleName};
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
268
    }
269
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
270
    my $new_grammar = GenTest::Grammar->new(grammar_rules => $rules);
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
271
    return $new_grammar;
272
}
273
274
275
sub firstMatchingRule {
276
    my ($self, @ids) = @_;
277
    foreach my $x (@ids) {
278
        return $self->rule($x) if defined $self->rule($x);
279
    }
0.67.26 by Bernt M. Johnsen
Apply new masking (still old semantics, 1 level down)
280
    return undef;
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
281
}
282
283
##
284
## The "body" of topGrammar
285
##
286
287
sub topGrammarX {
288
    my ($self, $level, $max, @rules) = @_;
289
    if ($max > 0) {
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
290
        my $result={};
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
291
        foreach my $rule (@rules) {
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
292
            foreach my $c (@{$rule->components()}) {
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
293
                my @subrules = ();
294
                foreach my $cp (@$c) {
295
                    push @subrules,$self->rule($cp) if defined $self->rule($cp);
296
                }
297
                my $componentrules = 
298
                    $self->topGrammarX($level + 1, $max -1,@subrules);
299
                if (defined  $componentrules) {
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
300
                    foreach my $sr (keys %$componentrules) {
301
                        $result->{$sr} = $componentrules->{$sr};
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
302
                    }
303
                }
304
            }
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
305
            $result->{$rule->name()} = $rule;
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
306
        }
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
307
        return $result;
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
308
    } else {
309
        return undef;
310
    }
311
}
312
313
314
##
315
## Produce a new grammar which is the toplevel $level rules of this
316
## grammar
317
##
318
319
sub topGrammar {
320
    my ($self, $levels, @startrules) = @_;
321
322
    my $start = $self->firstMatchingRule(@startrules);
323
324
    my $rules = $self->topGrammarX(0,$levels, $start);
325
326
    return GenTest::Grammar->new(grammar_rules => $rules);
327
}
328
329
##
330
## Produce a new grammar keeping a masked set of rules. The mask is 16
331
## bits. If the mask is too short, we use the original mask as a seed
332
## for a random number generator and generate more 16-bit values as
333
## needed. The mask is applied in alphapetical order on the rules to
334
## ensure a deterministicresult since I don't trust the Perl %hashes
335
## to be always ordered the same twhen they are produced e.g. from
336
## topGrammar or whatever...
337
##
338
339
340
sub mask {
341
    my ($self, $mask) = @_;
342
343
344
    my $rules = $self->rules();
345
346
    my %newRuleset;
347
348
    my $i = 0;
0.73.2 by Bernt M. Johnsen
Let --mask be a seed to the masking process instead of the first 16 bits of the maska
349
    my $prng = GenTest::Random->new(seed => $mask);
350
    ## Generate the first 16 bits.
351
    my $mask16 = $prng->uint16(0,0x7fff);
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
352
    foreach my $rulename (sort keys %$rules) {
353
        my $rule = $self->rule($rulename);
354
        my @newComponents;
0.67.89 by Bernt M. Johnsen
Some code simplifications (and hopefully speedups)
355
        foreach my $x (@{$rule->components()}) {
0.73.2 by Bernt M. Johnsen
Let --mask be a seed to the masking process instead of the first 16 bits of the maska
356
            push @newComponents, $x if (1 << ($i++)) & $mask16;
0.67.19 by Bernt M. Johnsen
New grammar masking and patching (not activated yet)
357
            if ($i % 16 == 0) {
358
                # We need more bits!
359
                $i = 0;
360
                $mask = $prng->uint16(0,0x7fff);
361
            }
362
        }
363
        
364
        my $newRule;
365
366
        ## If no components were chosen, we chose all to have a working
367
        ## grammar.
368
        if ($#newComponents < 0) {
369
            $newRule = $rule;
370
        } else {
371
            $newRule= GenTest::Grammar::Rule->new(name => $rulename,
372
                                              components => \@newComponents);
373
        }
374
        $newRuleset{$rulename}= $newRule;
375
        
376
    }
377
378
    return GenTest::Grammar->new(grammar_rules => \%newRuleset);
379
}
380
0.67.1 by Philip Stoev
initial import from internal tree
381
1;