~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
# -*- cperl -*-
2
# Copyright (C) 2004-2006 MySQL AB
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16
17
# This is a library file used by the Perl version of mysql-test-run,
18
# and is part of the translation of the Bourne shell script with the
19
# same name.
20
21
use Socket;
22
use Errno;
23
use strict;
24
25
use POSIX qw(WNOHANG SIGHUP);
26
27
sub mtr_run ($$$$$$;$);
28
sub mtr_spawn ($$$$$$;$);
29
sub mtr_check_stop_servers ($);
30
sub mtr_kill_leftovers ();
31
sub mtr_wait_blocking ($);
32
sub mtr_record_dead_children ();
33
sub mtr_ndbmgm_start($$);
34
sub mtr_exit ($);
35
sub sleep_until_file_created ($$$);
36
sub mtr_kill_processes ($);
37
sub mtr_ping_with_timeout($);
38
sub mtr_ping_port ($);
39
40
# Local function
41
sub spawn_impl ($$$$$$$);
42
43
##############################################################################
44
#
45
#  Execute an external command
46
#
47
##############################################################################
48
49
sub mtr_run ($$$$$$;$) {
50
  my $path=       shift;
51
  my $arg_list_t= shift;
52
  my $input=      shift;
53
  my $output=     shift;
54
  my $error=      shift;
55
  my $pid_file=   shift; # Not used
56
  my $spawn_opts= shift;
57
58
  return spawn_impl($path,$arg_list_t,'run',$input,$output,$error,
59
    $spawn_opts);
60
}
61
62
sub mtr_run_test ($$$$$$;$) {
63
  my $path=       shift;
64
  my $arg_list_t= shift;
65
  my $input=      shift;
66
  my $output=     shift;
67
  my $error=      shift;
68
  my $pid_file=   shift; # Not used
69
  my $spawn_opts= shift;
70
71
  return spawn_impl($path,$arg_list_t,'test',$input,$output,$error,
72
    $spawn_opts);
73
}
74
75
sub mtr_spawn ($$$$$$;$) {
76
  my $path=       shift;
77
  my $arg_list_t= shift;
78
  my $input=      shift;
79
  my $output=     shift;
80
  my $error=      shift;
81
  my $pid_file=   shift; # Not used
82
  my $spawn_opts= shift;
83
84
  return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error,
85
    $spawn_opts);
86
}
87
88
89
90
sub spawn_impl ($$$$$$$) {
91
  my $path=       shift;
92
  my $arg_list_t= shift;
93
  my $mode=       shift;
94
  my $input=      shift;
95
  my $output=     shift;
96
  my $error=      shift;
97
  my $spawn_opts= shift;
98
99
  if ( $::opt_script_debug )
100
  {
101
    mtr_report("");
102
    mtr_debug("-" x 73);
103
    mtr_debug("STDIN  $input") if $input;
104
    mtr_debug("STDOUT $output") if $output;
105
    mtr_debug("STDERR $error") if $error;
106
    mtr_debug("$mode: $path ", join(" ",@$arg_list_t));
107
    mtr_debug("spawn options:");
108
    if ($spawn_opts)
109
    {
110
      foreach my $key (sort keys %{$spawn_opts})
111
      {
112
        mtr_debug("  - $key: $spawn_opts->{$key}");
113
      }
114
    }
115
    else
116
    {
117
      mtr_debug("  none");
118
    }
119
    mtr_debug("-" x 73);
120
    mtr_report("");
121
  }
122
123
  mtr_error("Can't spawn with empty \"path\"") unless defined $path;
124
125
126
 FORK:
127
  {
128
    my $pid= fork();
129
130
    if ( ! defined $pid )
131
    {
132
      if ( $! == $!{EAGAIN} )           # See "perldoc Errno"
133
      {
134
        mtr_warning("Got EAGAIN from fork(), sleep 1 second and redo");
135
        sleep(1);
136
        redo FORK;
137
      }
138
139
      mtr_error("$path ($pid) can't be forked, error: $!");
140
141
    }
142
143
    if ( $pid )
144
    {
145
      select(STDOUT) if $::glob_win32_perl;
146
      return spawn_parent_impl($pid,$mode,$path);
147
    }
148
    else
149
    {
150
      # Child, redirect output and exec
151
152
      $SIG{INT}= 'DEFAULT';         # Parent do some stuff, we don't
153
154
      my $log_file_open_mode = '>';
155
156
      if ($spawn_opts and $spawn_opts->{'append_log_file'})
157
      {
158
        $log_file_open_mode = '>>';
159
      }
160
161
      if ( $output )
162
      {
163
	if ( $::glob_win32_perl )
164
	{
165
	  # Don't redirect stdout on ActiveState perl since this is
166
          # just another thread in the same process.
167
	}
168
        elsif ( ! open(STDOUT,$log_file_open_mode,$output) )
169
        {
170
          mtr_child_error("can't redirect STDOUT to \"$output\": $!");
171
        }
172
      }
173
174
      if ( $error )
175
      {
176
        if ( !$::glob_win32_perl and $output eq $error )
177
        {
178
          if ( ! open(STDERR,">&STDOUT") )
179
          {
180
            mtr_child_error("can't dup STDOUT: $!");
181
          }
182
        }
183
        else
184
        {
185
          if ( ! open(STDERR,$log_file_open_mode,$error) )
186
          {
187
            mtr_child_error("can't redirect STDERR to \"$error\": $!");
188
          }
189
        }
190
      }
191
192
      if ( $input )
193
      {
194
        if ( ! open(STDIN,"<",$input) )
195
        {
196
          mtr_child_error("can't redirect STDIN to \"$input\": $!");
197
        }
198
      }
199
200
      if ( ! exec($path,@$arg_list_t) )
201
      {
202
        mtr_child_error("failed to execute \"$path\": $!");
203
      }
204
      mtr_error("Should never come here 1!");
205
    }
206
    mtr_error("Should never come here 2!");
207
  }
208
  mtr_error("Should never come here 3!");
209
}
210
211
212
sub spawn_parent_impl {
213
  my $pid=  shift;
214
  my $mode= shift;
215
  my $path= shift;
216
217
  if ( $mode eq 'run' or $mode eq 'test' )
218
  {
219
    if ( $mode eq 'run' )
220
    {
221
      # Simple run of command, wait blocking for it to return
222
      my $ret_pid= waitpid($pid,0);
223
      if ( $ret_pid != $pid )
224
      {
225
	# The "simple" waitpid has failed, print debug info
226
	# and try to handle the error
227
        mtr_warning("waitpid($pid, 0) returned $ret_pid " .
228
		    "when waiting for '$path', error: '$!'");
229
	if ( $ret_pid == -1 )
230
	{
231
	  # waitpid returned -1, that would indicate the process
232
	  # no longer exist and waitpid couldn't wait for it.
233
	  return 1;
234
	}
235
	mtr_error("Error handling failed");
236
      }
237
238
      return mtr_process_exit_status($?);
239
    }
240
    else
241
    {
242
      # We run mysqltest and wait for it to return. But we try to
243
      # catch dying mysqld processes as well.
244
      #
245
      # We do blocking waitpid() until we get the return from the
246
      # "mysqltest" call. But if a mysqld process dies that we
247
      # started, we take this as an error, and kill mysqltest.
248
249
250
      my $exit_value= -1;
251
      my $saved_exit_value;
252
      my $ret_pid;                      # What waitpid() returns
253
254
      while ( ($ret_pid= waitpid(-1,0)) != -1 )
255
      {
256
        # Someone terminated, don't know who. Collect
257
        # status info first before $? is lost,
258
        # but not $exit_value, this is flagged from
259
260
        my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid);
261
        if ( $timer_name )
262
        {
263
          if ( $timer_name eq "suite" )
264
          {
265
            # We give up here
266
            # FIXME we should only give up the suite, not all of the run?
267
            print STDERR "\n";
268
            mtr_error("Test suite timeout");
269
          }
270
          elsif ( $timer_name eq "testcase" )
271
          {
272
            $saved_exit_value=  63;       # Mark as timeout
273
            kill(9, $pid);                # Kill mysqltest
274
            next;                         # Go on and catch the termination
275
          }
276
        }
277
278
        if ( $ret_pid == $pid )
279
        {
280
          # We got termination of mysqltest, we are done
281
          $exit_value= mtr_process_exit_status($?);
282
          last;
283
        }
284
285
        # One of the child processes died, unless this was expected
286
	# mysqltest should be killed and test aborted
287
288
	check_expected_crash_and_restart($ret_pid);
289
      }
290
291
      if ( $ret_pid != $pid )
292
      {
293
        # We terminated the waiting because a "mysqld" process died.
294
        # Kill the mysqltest process.
295
	mtr_verbose("Kill mysqltest because another process died");
296
        kill(9,$pid);
297
298
        $ret_pid= waitpid($pid,0);
299
300
        if ( $ret_pid != $pid )
301
        {
302
          mtr_error("$path ($pid) got lost somehow");
303
        }
304
      }
305
306
      return $saved_exit_value || $exit_value;
307
    }
308
  }
309
  else
310
  {
311
    # We spawned a process we don't wait for
312
    return $pid;
313
  }
314
}
315
316
317
# ----------------------------------------------------------------------
318
# We try to emulate how an Unix shell calculates the exit code
319
# ----------------------------------------------------------------------
320
321
sub mtr_process_exit_status {
322
  my $raw_status= shift;
323
324
  if ( $raw_status & 127 )
325
  {
326
    return ($raw_status & 127) + 128;  # Signal num + 128
327
  }
328
  else
329
  {
330
    return $raw_status >> 8;           # Exit code
331
  }
332
}
333
334
335
##############################################################################
336
#
337
#  Kill processes left from previous runs
338
#
339
##############################################################################
340
341
342
# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with
343
# this run
344
# Make sure to remove the PID file, if any.
345
# kill IM manager first, else it will restart the servers
346
sub mtr_kill_leftovers () {
347
348
  mtr_report("Killing Possible Leftover Processes");
349
  mtr_debug("mtr_kill_leftovers(): started.");
350
351
  my @kill_pids;
352
  my %admin_pids;
353
354
  foreach my $srv (@{$::master}, @{$::slave})
355
  {
356
    mtr_debug("  - mysqld " .
357
              "(pid: $srv->{pid}; " .
358
              "pid file: '$srv->{path_pid}'; " .
359
              "socket: '$srv->{path_sock}'; ".
360
              "port: $srv->{port})");
361
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
362
    my $pid= mtr_server_shutdown($srv);
1 by brian
clean slate
363
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
364
    # Save the pid of the drizzle client process
1 by brian
clean slate
365
    $admin_pids{$pid}= 1;
366
367
    push(@kill_pids,{
368
		     pid      => $srv->{'pid'},
369
		     pidfile  => $srv->{'path_pid'},
370
		     sockfile => $srv->{'path_sock'},
371
		     port     => $srv->{'port'},
372
		    });
373
    $srv->{'pid'}= 0; # Assume we are done with it
374
  }
375
376
  # Wait for all the admin processes to complete
377
  mtr_wait_blocking(\%admin_pids);
378
379
  # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
380
  # terminate now, but we don't (FIXME should be debugged).
381
  # So we try again to ping and at least wait the same amount of time
382
  # mysqladmin would for all to die.
383
384
  mtr_ping_with_timeout(\@kill_pids);
385
386
  # We now have tried to terminate nice. We have waited for the listen
387
  # port to be free, but can't really tell if the mysqld process died
388
  # or not. We now try to find the process PID from the PID file, and
389
  # send a kill to that process. Note that Perl let kill(0,@pids) be
390
  # a way to just return the numer of processes the kernel can send
391
  # signals to. So this can be used (except on Cygwin) to determine
392
  # if there are processes left running that we cound out might exists.
393
  #
394
  # But still after all this work, all we know is that we have
395
  # the ports free.
396
397
  # We scan the "var/run/" directory for other process id's to kill
398
399
  my $rundir= "$::opt_vardir/run";
400
401
  mtr_debug("Processing PID files in directory '$rundir'...");
402
403
  if ( -d $rundir )
404
  {
405
    opendir(RUNDIR, $rundir)
406
      or mtr_error("can't open directory \"$rundir\": $!");
407
408
    my @pids;
409
410
    while ( my $elem= readdir(RUNDIR) )
411
    {
412
      # Only read pid from files that end with .pid
413
      if ( $elem =~ /.*[.]pid$/)
414
      {
415
	my $pidfile= "$rundir/$elem";
416
417
	if ( -f $pidfile )
418
	{
419
	  mtr_debug("Processing PID file: '$pidfile'...");
420
421
	  my $pid= mtr_get_pid_from_file($pidfile);
422
423
	  mtr_debug("Got pid: $pid from file '$pidfile'");
424
425
	  if ( $::glob_cygwin_perl or kill(0, $pid) )
426
	  {
427
	    mtr_debug("There is process with pid $pid -- scheduling for kill.");
428
	    push(@pids, $pid);            # We know (cygwin guess) it exists
429
	  }
430
	  else
431
	  {
432
	    mtr_debug("There is no process with pid $pid -- skipping.");
433
	  }
434
	}
435
      }
436
      else
437
      {
438
	mtr_warning("Found non pid file $elem in $rundir")
439
	  if -f "$rundir/$elem";
440
	next;
441
      }
442
    }
443
    closedir(RUNDIR);
444
445
    if ( @pids )
446
    {
447
      mtr_debug("Killing the following processes with PID files: " .
448
                join(' ', @pids) . "...");
449
450
      start_reap_all();
451
452
      if ( $::glob_cygwin_perl )
453
      {
454
        # We have no (easy) way of knowing the Cygwin controlling
455
        # process, in the PID file we only have the Windows process id.
456
        system("kill -f " . join(" ",@pids)); # Hope for the best....
457
        mtr_debug("Sleep 5 seconds waiting for processes to die");
458
        sleep(5);
459
      }
460
      else
461
      {
462
        my $retries= 10;                    # 10 seconds
463
        do
464
        {
465
          mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids));
466
          kill(9, @pids);
467
          mtr_report("Sleep 1 second waiting for processes to die");
468
          sleep(1)                      # Wait one second
469
        } while ( $retries-- and  kill(0, @pids) );
470
471
        if ( kill(0, @pids) )           # Check if some left
472
        {
473
          mtr_warning("can't kill process(es) " . join(" ", @pids));
474
        }
475
      }
476
477
      stop_reap_all();
478
    }
479
  }
480
  else
481
  {
482
    mtr_debug("Directory for PID files ($rundir) does not exist.");
483
  }
484
485
  # We may have failed everything, but we now check again if we have
486
  # the listen ports free to use, and if they are free, just go for it.
487
488
  mtr_debug("Checking known mysqld servers...");
489
490
  foreach my $srv ( @kill_pids )
491
  {
492
    if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) )
493
    {
494
      mtr_warning("can't kill old process holding port $srv->{'port'}");
495
    }
496
  }
497
498
  mtr_debug("mtr_kill_leftovers(): finished.");
499
}
500
501
502
#
503
# Check that all processes in "spec" are shutdown gracefully
504
# else kill them off hard
505
#
506
sub mtr_check_stop_servers ($) {
507
  my $spec=  shift;
508
509
  # Return if no processes are defined
510
  return if ! @$spec;
511
512
  mtr_verbose("mtr_check_stop_servers");
513
514
  # ----------------------------------------------------------------------
515
  # Wait until servers in "spec" has stopped listening
516
  # to their ports or timeout occurs
517
  # ----------------------------------------------------------------------
518
  mtr_ping_with_timeout(\@$spec);
519
520
  # ----------------------------------------------------------------------
521
  # Use waitpid() nonblocking for a little while, to see how
522
  # many process's will exit sucessfully.
523
  # This is the normal case.
524
  # ----------------------------------------------------------------------
779.3.36 by Monty Taylor
Fixed timeout problem. Should probably not hardcode this, but screw it.
525
  my $wait_counter= 100; # Max number of times to redo the loop
1 by brian
clean slate
526
  foreach my $srv ( @$spec )
527
  {
528
    my $pid= $srv->{'pid'};
529
    my $ret_pid;
530
    if ( $pid )
531
    {
532
      $ret_pid= waitpid($pid,&WNOHANG);
533
      if ($ret_pid == $pid)
534
      {
535
	mtr_verbose("Caught exit of process $ret_pid");
536
	$srv->{'pid'}= 0;
537
      }
538
      elsif ($ret_pid == 0)
539
      {
540
	mtr_verbose("Process $pid is still alive");
541
	if ($wait_counter-- > 0)
542
	{
543
	  # Give the processes more time to exit
544
	  select(undef, undef, undef, (0.1));
545
	  redo;
546
	}
547
      }
548
      else
549
      {
550
	mtr_warning("caught exit of unknown child $ret_pid");
551
      }
552
    }
553
  }
554
555
  # ----------------------------------------------------------------------
556
  # The processes that haven't yet exited need to
557
  # be killed hard, put them in "kill_pids" hash
558
  # ----------------------------------------------------------------------
559
  my %kill_pids;
560
  foreach my $srv ( @$spec )
561
  {
562
    my $pid= $srv->{'pid'};
563
    if ( $pid )
564
    {
565
      # Server is still alive, put it in list to be hard killed
566
      if ($::glob_win32_perl)
567
      {
568
	# Kill the real process if it's known
569
	$pid= $srv->{'real_pid'} if ($srv->{'real_pid'});
570
      }
571
      $kill_pids{$pid}= 1;
572
573
      # Write a message to the process's error log (if it has one)
574
      # that it's being killed hard.
575
      if ( defined $srv->{'errfile'} )
576
      {
577
	mtr_tofile($srv->{'errfile'}, "Note: Forcing kill of process $pid\n");
578
      }
579
      mtr_warning("Forcing kill of process $pid");
580
581
    }
582
    else
583
    {
584
      # Server is dead, remove the pidfile if it exists
585
      #
586
      # Race, could have been removed between test with -f
587
      # and the unlink() below, so better check again with -f
588
      if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
589
           -f $srv->{'pidfile'} )
590
      {
591
        mtr_error("can't remove $srv->{'pidfile'}");
592
      }
593
    }
594
  }
595
596
  if ( ! keys %kill_pids )
597
  {
598
    # All processes has exited gracefully
599
    return;
600
  }
601
602
  mtr_kill_processes(\%kill_pids);
603
604
  # ----------------------------------------------------------------------
605
  # All processes are killed, cleanup leftover files
606
  # ----------------------------------------------------------------------
607
  {
608
    my $errors= 0;
609
    foreach my $srv ( @$spec )
610
    {
611
      if ( $srv->{'pid'} )
612
      {
613
	# Server has been hard killed, clean it's resources
614
	foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
615
        {
616
	  # Know it is dead so should be no race, careful anyway
617
	  if ( defined $file and -f $file and ! unlink($file) and -f $file )
618
          {
619
	    $errors++;
620
	    mtr_warning("couldn't delete $file");
621
	  }
622
	}
623
624
	if ($::glob_win32_perl and $srv->{'real_pid'})
625
	{
626
	  # Wait for the pseudo pid - if the real_pid was known
627
	  # the pseudo pid has not been waited for yet, wai blocking
628
	  # since it's "such a simple program"
629
	  mtr_verbose("Wait for pseudo process $srv->{'pid'}");
630
	  my $ret_pid= waitpid($srv->{'pid'}, 0);
631
	  mtr_verbose("Pseudo process $ret_pid died");
632
	}
633
634
	$srv->{'pid'}= 0;
635
      }
636
    }
637
    if ( $errors )
638
    {
639
      # There where errors killing processes
640
      # do one last attempt to ping the servers
641
      # and if they can't be pinged, assume they are dead
642
      if ( ! mtr_ping_with_timeout( \@$spec ) )
643
      {
644
	mtr_error("we could not kill or clean up all processes");
645
      }
646
      else
647
      {
648
	mtr_verbose("All ports were free, continuing");
649
      }
650
    }
651
  }
652
}
653
654
655
# Wait for all the process in the list to terminate
656
sub mtr_wait_blocking($) {
657
  my $admin_pids= shift;
658
659
660
  # Return if no processes defined
661
  return if ! %$admin_pids;
662
663
  mtr_verbose("mtr_wait_blocking");
664
665
  # Wait for all the started processes to exit
666
  # As mysqladmin is such a simple program, we trust it to terminate itself.
667
  # I.e. we wait blocking, and wait for them all before we go on.
668
  foreach my $pid (keys %{$admin_pids})
669
  {
670
    my $ret_pid= waitpid($pid,0);
671
672
  }
673
}
674
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
675
sub mtr_server_shutdown($) {
1 by brian
clean slate
676
  my $srv= shift;
677
  my $args;
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
678
1 by brian
clean slate
679
  mtr_init_args(\$args);
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
680
  mtr_add_arg($args, "--shutdown");
1 by brian
clean slate
681
  mtr_add_arg($args, "--user=%s", $::opt_user);
682
  mtr_add_arg($args, "--password=");
683
  mtr_add_arg($args, "--silent");
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
684
1 by brian
clean slate
685
  if ( -e $srv->{'path_sock'} )
686
  {
687
    mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'});
688
  }
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
689
1 by brian
clean slate
690
  if ( $srv->{'port'} )
691
  {
692
    mtr_add_arg($args, "--port=%s", $srv->{'port'});
693
  }
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
694
1 by brian
clean slate
695
  mtr_add_arg($args, "--connect_timeout=5");
696
973.1.3 by Toru Maesaka
Remove drizzleadmin from the repository and fix the test suite for it.
697
  my $pid= mtr_spawn($::exe_drizzle, $args,
698
                     "", "", "", "", { append_log_file => 1 });
699
  mtr_verbose("mtr_server_shutdown, pid: $pid");
1 by brian
clean slate
700
  return $pid;
701
}
702
703
# Start "ndb_mgm shutdown" for a specific cluster, it will
704
# shutdown all data nodes and leave the ndb_mgmd running
705
sub mtr_ndbmgm_start($$) {
706
  my $cluster= shift;
707
  my $command= shift;
708
709
  my $args;
710
711
  mtr_init_args(\$args);
712
713
  mtr_add_arg($args, "--no-defaults");
714
  mtr_add_arg($args, "--core");
715
  mtr_add_arg($args, "--try-reconnect=1");
716
  mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'});
717
  mtr_add_arg($args, "-e");
718
  mtr_add_arg($args, "$command");
719
720
  my $pid= mtr_spawn($::exe_ndb_mgm, $args,
721
		     "", "/dev/null", "/dev/null", "",
722
		     {});
723
  mtr_verbose("mtr_ndbmgm_start, pid: $pid");
724
  return $pid;
725
726
}
727
728
729
# Ping all servers in list, exit when none of them answers
730
# or when timeout has passed
731
sub mtr_ping_with_timeout($) {
732
  my $spec= shift;
733
  my $timeout= 200;                     # 20 seconds max
734
  my $res= 1;                           # If we just fall through, we are done
735
                                        # in the sense that the servers don't
736
                                        # listen to their ports any longer
737
738
  mtr_debug("Waiting for mysqld servers to stop...");
739
740
 TIME:
741
  while ( $timeout-- )
742
  {
743
    foreach my $srv ( @$spec )
744
    {
745
      $res= 1;                          # We are optimistic
746
      if ( $srv->{'pid'} and defined $srv->{'port'} )
747
      {
748
	if ( mtr_ping_port($srv->{'port'}) )
749
	{
750
	  mtr_verbose("waiting for process $srv->{'pid'} to stop ".
751
		      "using port $srv->{'port'}");
752
753
	  # Millisceond sleep emulated with select
754
	  select(undef, undef, undef, (0.1));
755
	  $res= 0;
756
	  next TIME;
757
	}
758
	else
759
	{
760
	  # Process was not using port
761
	}
762
      }
763
    }
764
    last;                               # If we got here, we are done
765
  }
766
767
  if ($res)
768
  {
769
    mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down.");
770
  }
771
  else
772
  {
773
    mtr_report("mtr_ping_with_timeout(): At least one server is alive.");
774
  }
775
776
  return $res;
777
}
778
779
780
#
781
# Loop through our list of processes and look for and entry
782
# with the provided pid
783
# Set the pid of that process to 0 if found
784
#
785
sub mark_process_dead($)
786
{
787
  my $ret_pid= shift;
788
789
  foreach my $mysqld (@{$::master}, @{$::slave})
790
  {
791
    if ( $mysqld->{'pid'} eq $ret_pid )
792
    {
793
      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
794
      $mysqld->{'pid'}= 0;
795
      return;
796
    }
797
  }
798
799
  foreach my $cluster (@{$::clusters})
800
  {
801
    if ( $cluster->{'pid'} eq $ret_pid )
802
    {
803
      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
804
      $cluster->{'pid'}= 0;
805
      return;
806
    }
807
808
    foreach my $ndbd (@{$cluster->{'ndbds'}})
809
    {
810
      if ( $ndbd->{'pid'} eq $ret_pid )
811
      {
812
	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
813
	$ndbd->{'pid'}= 0;
814
	return;
815
      }
816
    }
817
  }
818
  mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid");
819
820
}
821
822
#
823
# Loop through our list of processes and look for and entry
824
# with the provided pid, if found check for the file indicating
825
# expected crash and restart it.
826
#
827
sub check_expected_crash_and_restart($)
828
{
829
  my $ret_pid= shift;
830
831
  foreach my $mysqld (@{$::master}, @{$::slave})
832
  {
833
    if ( $mysqld->{'pid'} eq $ret_pid )
834
    {
835
      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
836
      $mysqld->{'pid'}= 0;
837
838
      # Check if crash expected and restart if it was
839
      my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" .
840
	"$mysqld->{'idx'}" . ".expect";
841
      if ( -f $expect_file )
842
      {
843
	mtr_verbose("Crash was expected, file $expect_file exists");
844
	mysqld_start($mysqld, $mysqld->{'start_opts'},
845
		     $mysqld->{'start_slave_master_info'});
846
	unlink($expect_file);
847
      }
848
849
      return;
850
    }
851
  }
852
853
  foreach my $cluster (@{$::clusters})
854
  {
855
    if ( $cluster->{'pid'} eq $ret_pid )
856
    {
857
      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
858
      $cluster->{'pid'}= 0;
859
860
      # Check if crash expected and restart if it was
861
      my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" .
862
	".expect";
863
      if ( -f $expect_file )
864
      {
865
	mtr_verbose("Crash was expected, file $expect_file exists");
866
	ndbmgmd_start($cluster);
867
	unlink($expect_file);
868
      }
869
      return;
870
    }
871
872
    foreach my $ndbd (@{$cluster->{'ndbds'}})
873
    {
874
      if ( $ndbd->{'pid'} eq $ret_pid )
875
      {
876
	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
877
	$ndbd->{'pid'}= 0;
878
879
	# Check if crash expected and restart if it was
880
	my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" .
881
	  "$ndbd->{'idx'}" . ".expect";
882
	if ( -f $expect_file )
883
	{
884
	  mtr_verbose("Crash was expected, file $expect_file exists");
885
	  ndbd_start($cluster, $ndbd->{'idx'},
886
		     $ndbd->{'start_extra_args'});
887
	  unlink($expect_file);
888
	}
889
	return;
890
      }
891
    }
892
  }
893
894
  if ($::instance_manager->{'spawner_pid'} eq $ret_pid)
895
  {
896
    return;
897
  }
898
899
  mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid");
900
901
}
902
903
##############################################################################
904
#
905
#  The operating system will keep information about dead children, 
906
#  we read this information here, and if we have records the process
907
#  is alive, we mark it as dead.
908
#
909
##############################################################################
910
911
sub mtr_record_dead_children () {
912
913
  my $process_died= 0;
914
  my $ret_pid;
915
916
  # Wait without blockinng to see if any processes had died
917
  # -1 or 0 means there are no more procesess to wait for
918
  while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1)
919
  {
920
    mtr_warning("mtr_record_dead_children: $ret_pid");
921
    mark_process_dead($ret_pid);
922
    $process_died= 1;
923
  }
924
  return $process_died;
925
}
926
927
sub start_reap_all {
928
  # This causes terminating processes to not become zombies, avoiding
929
  # the need for (or possibility of) explicit waitpid().
930
  $SIG{CHLD}= 'IGNORE';
931
932
  # On some platforms (Linux, QNX, OSX, ...) there is potential race
933
  # here. If a process terminated before setting $SIG{CHLD} (but after
934
  # any attempt to waitpid() it), it will still be a zombie. So we
935
  # have to handle any such process here.
936
  my $pid;
937
  while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1)
938
  {
939
    mtr_warning("start_reap_all pid: $pid");
940
    mark_process_dead($pid);
941
  };
942
}
943
944
sub stop_reap_all {
945
  $SIG{CHLD}= 'DEFAULT';
946
}
947
948
949
sub mtr_ping_port ($) {
950
  my $port= shift;
951
952
  mtr_verbose("mtr_ping_port: $port");
953
954
  my $remote= "localhost";
955
  my $iaddr=  inet_aton($remote);
956
  if ( ! $iaddr )
957
  {
958
    mtr_error("can't find IP number for $remote");
959
  }
960
  my $paddr=  sockaddr_in($port, $iaddr);
961
  my $proto=  getprotobyname('tcp');
962
  if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) )
963
  {
964
    mtr_error("can't create socket: $!");
965
  }
966
967
  mtr_debug("Pinging server (port: $port)...");
968
969
  if ( connect(SOCK, $paddr) )
970
  {
971
    close(SOCK);                        # FIXME check error?
972
    mtr_verbose("USED");
973
    return 1;
974
  }
975
  else
976
  {
977
    mtr_verbose("FREE");
978
    return 0;
979
  }
980
}
981
982
##############################################################################
983
#
984
#  Wait for a file to be created
985
#
986
##############################################################################
987
988
# FIXME check that the pidfile contains the expected pid!
989
990
sub sleep_until_file_created ($$$) {
991
  my $pidfile= shift;
992
  my $timeout= shift;
993
  my $pid=     shift;
994
  my $sleeptime= 100; # Milliseconds
995
  my $loops= ($timeout * 1000) / $sleeptime;
996
997
  for ( my $loop= 1; $loop <= $loops; $loop++ )
998
  {
999
    if ( -r $pidfile )
1000
    {
1001
      return 1;
1002
    }
1003
1004
    # Check if it died after the fork() was successful
1005
    if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid )
1006
    {
1007
      mtr_warning("Process $pid died");
1008
      return 0;
1009
    }
1010
1011
    mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile");
1012
1013
    # Print extra message every 60 seconds
1014
    my $seconds= ($loop * $sleeptime) / 1000;
1015
    if ( $seconds > 1 and int($seconds * 10) % 600 == 0 )
1016
    {
1017
      my $left= $timeout - $seconds;
1018
      mtr_warning("Waited $seconds seconds for $pidfile to be created, " .
1019
                  "still waiting for $left seconds...");
1020
    }
1021
1022
    # Millisceond sleep emulated with select
1023
    select(undef, undef, undef, ($sleeptime/1000));
1024
  }
1025
1026
  return 0;
1027
}
1028
1029
1030
sub mtr_kill_processes ($) {
1031
  my $pids = shift;
1032
1033
  mtr_verbose("mtr_kill_processes (" . join(" ", keys %{$pids}) . ")");
1034
1035
  foreach my $pid (keys %{$pids})
1036
  {
1037
1038
    if ($pid <= 0)
1039
    {
1040
      mtr_warning("Trying to kill illegal pid: $pid");
1041
      next;
1042
    }
1043
1044
    my $signaled_procs= kill(9, $pid);
1045
    if ($signaled_procs == 0)
1046
    {
1047
      # No such process existed, assume it's killed
1048
      mtr_verbose("killed $pid(no such process)");
1049
    }
1050
    else
1051
    {
1052
      my $ret_pid= waitpid($pid,0);
1053
      if ($ret_pid == $pid)
1054
      {
1055
	mtr_verbose("killed $pid(got the pid)");
1056
      }
1057
      elsif ($ret_pid == -1)
1058
      {
1059
	mtr_verbose("killed $pid(got -1)");
1060
      }
1061
    }
1062
  }
1063
  mtr_verbose("done killing processes");
1064
}
1065
1066
1067
##############################################################################
1068
#
1069
#  When we exit, we kill off all children
1070
#
1071
##############################################################################
1072
1073
sub mtr_exit ($) {
1074
  my $code= shift;
1075
  mtr_timer_stop_all($::glob_timers);
1076
  local $SIG{HUP} = 'IGNORE';
1077
  # ToDo: Signalling -$$ will only work if we are the process group
1078
  # leader (in fact on QNX it will signal our session group leader,
1079
  # which might be Do-compile or Pushbuild, causing tests to be
1080
  # aborted). So we only do it if we are the group leader. We might
1081
  # set ourselves as the group leader at startup (with
1082
  # POSIX::setpgrp(0,0)), but then care must be needed to always do
1083
  # proper child process cleanup.
1084
  POSIX::kill(SIGHUP, -$$) if !$::glob_win32_perl and $$ == getpgrp();
1085
1086
  exit($code);
1087
}
1088
1089
###########################################################################
1090
1091
1;