~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
# -*- cperl -*-
2
# Copyright (C) 2004-2006 MySQL AB
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16
17
# This is a library file used by the Perl version of mysql-test-run,
18
# and is part of the translation of the Bourne shell script with the
19
# same name.
20
21
use Socket;
22
use Errno;
23
use strict;
24
25
use POSIX qw(WNOHANG SIGHUP);
26
27
sub mtr_run ($$$$$$;$);
28
sub mtr_spawn ($$$$$$;$);
29
sub mtr_check_stop_servers ($);
30
sub mtr_kill_leftovers ();
31
sub mtr_wait_blocking ($);
32
sub mtr_record_dead_children ();
33
sub mtr_ndbmgm_start($$);
34
sub mtr_mysqladmin_start($$$);
35
sub mtr_exit ($);
36
sub sleep_until_file_created ($$$);
37
sub mtr_kill_processes ($);
38
sub mtr_ping_with_timeout($);
39
sub mtr_ping_port ($);
40
41
# Local function
42
sub spawn_impl ($$$$$$$);
43
44
##############################################################################
45
#
46
#  Execute an external command
47
#
48
##############################################################################
49
50
sub mtr_run ($$$$$$;$) {
51
  my $path=       shift;
52
  my $arg_list_t= shift;
53
  my $input=      shift;
54
  my $output=     shift;
55
  my $error=      shift;
56
  my $pid_file=   shift; # Not used
57
  my $spawn_opts= shift;
58
59
  return spawn_impl($path,$arg_list_t,'run',$input,$output,$error,
60
    $spawn_opts);
61
}
62
63
sub mtr_run_test ($$$$$$;$) {
64
  my $path=       shift;
65
  my $arg_list_t= shift;
66
  my $input=      shift;
67
  my $output=     shift;
68
  my $error=      shift;
69
  my $pid_file=   shift; # Not used
70
  my $spawn_opts= shift;
71
72
  return spawn_impl($path,$arg_list_t,'test',$input,$output,$error,
73
    $spawn_opts);
74
}
75
76
sub mtr_spawn ($$$$$$;$) {
77
  my $path=       shift;
78
  my $arg_list_t= shift;
79
  my $input=      shift;
80
  my $output=     shift;
81
  my $error=      shift;
82
  my $pid_file=   shift; # Not used
83
  my $spawn_opts= shift;
84
85
  return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error,
86
    $spawn_opts);
87
}
88
89
90
91
sub spawn_impl ($$$$$$$) {
92
  my $path=       shift;
93
  my $arg_list_t= shift;
94
  my $mode=       shift;
95
  my $input=      shift;
96
  my $output=     shift;
97
  my $error=      shift;
98
  my $spawn_opts= shift;
99
100
  if ( $::opt_script_debug )
101
  {
102
    mtr_report("");
103
    mtr_debug("-" x 73);
104
    mtr_debug("STDIN  $input") if $input;
105
    mtr_debug("STDOUT $output") if $output;
106
    mtr_debug("STDERR $error") if $error;
107
    mtr_debug("$mode: $path ", join(" ",@$arg_list_t));
108
    mtr_debug("spawn options:");
109
    if ($spawn_opts)
110
    {
111
      foreach my $key (sort keys %{$spawn_opts})
112
      {
113
        mtr_debug("  - $key: $spawn_opts->{$key}");
114
      }
115
    }
116
    else
117
    {
118
      mtr_debug("  none");
119
    }
120
    mtr_debug("-" x 73);
121
    mtr_report("");
122
  }
123
124
  mtr_error("Can't spawn with empty \"path\"") unless defined $path;
125
126
127
 FORK:
128
  {
129
    my $pid= fork();
130
131
    if ( ! defined $pid )
132
    {
133
      if ( $! == $!{EAGAIN} )           # See "perldoc Errno"
134
      {
135
        mtr_warning("Got EAGAIN from fork(), sleep 1 second and redo");
136
        sleep(1);
137
        redo FORK;
138
      }
139
140
      mtr_error("$path ($pid) can't be forked, error: $!");
141
142
    }
143
144
    if ( $pid )
145
    {
146
      select(STDOUT) if $::glob_win32_perl;
147
      return spawn_parent_impl($pid,$mode,$path);
148
    }
149
    else
150
    {
151
      # Child, redirect output and exec
152
153
      $SIG{INT}= 'DEFAULT';         # Parent do some stuff, we don't
154
155
      my $log_file_open_mode = '>';
156
157
      if ($spawn_opts and $spawn_opts->{'append_log_file'})
158
      {
159
        $log_file_open_mode = '>>';
160
      }
161
162
      if ( $output )
163
      {
164
	if ( $::glob_win32_perl )
165
	{
166
	  # Don't redirect stdout on ActiveState perl since this is
167
          # just another thread in the same process.
168
	}
169
        elsif ( ! open(STDOUT,$log_file_open_mode,$output) )
170
        {
171
          mtr_child_error("can't redirect STDOUT to \"$output\": $!");
172
        }
173
      }
174
175
      if ( $error )
176
      {
177
        if ( !$::glob_win32_perl and $output eq $error )
178
        {
179
          if ( ! open(STDERR,">&STDOUT") )
180
          {
181
            mtr_child_error("can't dup STDOUT: $!");
182
          }
183
        }
184
        else
185
        {
186
          if ( ! open(STDERR,$log_file_open_mode,$error) )
187
          {
188
            mtr_child_error("can't redirect STDERR to \"$error\": $!");
189
          }
190
        }
191
      }
192
193
      if ( $input )
194
      {
195
        if ( ! open(STDIN,"<",$input) )
196
        {
197
          mtr_child_error("can't redirect STDIN to \"$input\": $!");
198
        }
199
      }
200
201
      if ( ! exec($path,@$arg_list_t) )
202
      {
203
        mtr_child_error("failed to execute \"$path\": $!");
204
      }
205
      mtr_error("Should never come here 1!");
206
    }
207
    mtr_error("Should never come here 2!");
208
  }
209
  mtr_error("Should never come here 3!");
210
}
211
212
213
sub spawn_parent_impl {
214
  my $pid=  shift;
215
  my $mode= shift;
216
  my $path= shift;
217
218
  if ( $mode eq 'run' or $mode eq 'test' )
219
  {
220
    if ( $mode eq 'run' )
221
    {
222
      # Simple run of command, wait blocking for it to return
223
      my $ret_pid= waitpid($pid,0);
224
      if ( $ret_pid != $pid )
225
      {
226
	# The "simple" waitpid has failed, print debug info
227
	# and try to handle the error
228
        mtr_warning("waitpid($pid, 0) returned $ret_pid " .
229
		    "when waiting for '$path', error: '$!'");
230
	if ( $ret_pid == -1 )
231
	{
232
	  # waitpid returned -1, that would indicate the process
233
	  # no longer exist and waitpid couldn't wait for it.
234
	  return 1;
235
	}
236
	mtr_error("Error handling failed");
237
      }
238
239
      return mtr_process_exit_status($?);
240
    }
241
    else
242
    {
243
      # We run mysqltest and wait for it to return. But we try to
244
      # catch dying mysqld processes as well.
245
      #
246
      # We do blocking waitpid() until we get the return from the
247
      # "mysqltest" call. But if a mysqld process dies that we
248
      # started, we take this as an error, and kill mysqltest.
249
250
251
      my $exit_value= -1;
252
      my $saved_exit_value;
253
      my $ret_pid;                      # What waitpid() returns
254
255
      while ( ($ret_pid= waitpid(-1,0)) != -1 )
256
      {
257
        # Someone terminated, don't know who. Collect
258
        # status info first before $? is lost,
259
        # but not $exit_value, this is flagged from
260
261
        my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid);
262
        if ( $timer_name )
263
        {
264
          if ( $timer_name eq "suite" )
265
          {
266
            # We give up here
267
            # FIXME we should only give up the suite, not all of the run?
268
            print STDERR "\n";
269
            mtr_error("Test suite timeout");
270
          }
271
          elsif ( $timer_name eq "testcase" )
272
          {
273
            $saved_exit_value=  63;       # Mark as timeout
274
            kill(9, $pid);                # Kill mysqltest
275
            next;                         # Go on and catch the termination
276
          }
277
        }
278
279
        if ( $ret_pid == $pid )
280
        {
281
          # We got termination of mysqltest, we are done
282
          $exit_value= mtr_process_exit_status($?);
283
          last;
284
        }
285
286
        # One of the child processes died, unless this was expected
287
	# mysqltest should be killed and test aborted
288
289
	check_expected_crash_and_restart($ret_pid);
290
      }
291
292
      if ( $ret_pid != $pid )
293
      {
294
        # We terminated the waiting because a "mysqld" process died.
295
        # Kill the mysqltest process.
296
	mtr_verbose("Kill mysqltest because another process died");
297
        kill(9,$pid);
298
299
        $ret_pid= waitpid($pid,0);
300
301
        if ( $ret_pid != $pid )
302
        {
303
          mtr_error("$path ($pid) got lost somehow");
304
        }
305
      }
306
307
      return $saved_exit_value || $exit_value;
308
    }
309
  }
310
  else
311
  {
312
    # We spawned a process we don't wait for
313
    return $pid;
314
  }
315
}
316
317
318
# ----------------------------------------------------------------------
319
# We try to emulate how an Unix shell calculates the exit code
320
# ----------------------------------------------------------------------
321
322
sub mtr_process_exit_status {
323
  my $raw_status= shift;
324
325
  if ( $raw_status & 127 )
326
  {
327
    return ($raw_status & 127) + 128;  # Signal num + 128
328
  }
329
  else
330
  {
331
    return $raw_status >> 8;           # Exit code
332
  }
333
}
334
335
336
##############################################################################
337
#
338
#  Kill processes left from previous runs
339
#
340
##############################################################################
341
342
343
# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with
344
# this run
345
# Make sure to remove the PID file, if any.
346
# kill IM manager first, else it will restart the servers
347
sub mtr_kill_leftovers () {
348
349
  mtr_report("Killing Possible Leftover Processes");
350
  mtr_debug("mtr_kill_leftovers(): started.");
351
352
  my @kill_pids;
353
  my %admin_pids;
354
355
  foreach my $srv (@{$::master}, @{$::slave})
356
  {
357
    mtr_debug("  - mysqld " .
358
              "(pid: $srv->{pid}; " .
359
              "pid file: '$srv->{path_pid}'; " .
360
              "socket: '$srv->{path_sock}'; ".
361
              "port: $srv->{port})");
362
363
    my $pid= mtr_mysqladmin_start($srv, "shutdown", 20);
364
365
    # Save the pid of the mysqladmin process
366
    $admin_pids{$pid}= 1;
367
368
    push(@kill_pids,{
369
		     pid      => $srv->{'pid'},
370
		     pidfile  => $srv->{'path_pid'},
371
		     sockfile => $srv->{'path_sock'},
372
		     port     => $srv->{'port'},
373
		    });
374
    $srv->{'pid'}= 0; # Assume we are done with it
375
  }
376
377
  if ( ! $::opt_skip_ndbcluster )
378
  {
379
380
    foreach my $cluster (@{$::clusters})
381
    {
382
383
      # Don't shut down a "running" cluster
384
      next if $cluster->{'use_running'};
385
386
      mtr_debug("  - cluster " .
387
		"(pid: $cluster->{pid}; " .
388
		"pid file: '$cluster->{path_pid})");
389
390
      my $pid= mtr_ndbmgm_start($cluster, "shutdown");
391
392
      # Save the pid of the ndb_mgm process
393
      $admin_pids{$pid}= 1;
394
395
      push(@kill_pids,{
396
		       pid      => $cluster->{'pid'},
397
		       pidfile  => $cluster->{'path_pid'}
398
		      });
399
400
      $cluster->{'pid'}= 0; # Assume we are done with it
401
402
      foreach my $ndbd (@{$cluster->{'ndbds'}})
403
      {
404
	mtr_debug("    - ndbd " .
405
		  "(pid: $ndbd->{pid}; " .
406
		  "pid file: '$ndbd->{path_pid})");
407
408
	push(@kill_pids,{
409
			 pid      => $ndbd->{'pid'},
410
			 pidfile  => $ndbd->{'path_pid'},
411
			});
412
	$ndbd->{'pid'}= 0; # Assume we are done with it
413
      }
414
    }
415
  }
416
417
  # Wait for all the admin processes to complete
418
  mtr_wait_blocking(\%admin_pids);
419
420
  # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
421
  # terminate now, but we don't (FIXME should be debugged).
422
  # So we try again to ping and at least wait the same amount of time
423
  # mysqladmin would for all to die.
424
425
  mtr_ping_with_timeout(\@kill_pids);
426
427
  # We now have tried to terminate nice. We have waited for the listen
428
  # port to be free, but can't really tell if the mysqld process died
429
  # or not. We now try to find the process PID from the PID file, and
430
  # send a kill to that process. Note that Perl let kill(0,@pids) be
431
  # a way to just return the numer of processes the kernel can send
432
  # signals to. So this can be used (except on Cygwin) to determine
433
  # if there are processes left running that we cound out might exists.
434
  #
435
  # But still after all this work, all we know is that we have
436
  # the ports free.
437
438
  # We scan the "var/run/" directory for other process id's to kill
439
440
  my $rundir= "$::opt_vardir/run";
441
442
  mtr_debug("Processing PID files in directory '$rundir'...");
443
444
  if ( -d $rundir )
445
  {
446
    opendir(RUNDIR, $rundir)
447
      or mtr_error("can't open directory \"$rundir\": $!");
448
449
    my @pids;
450
451
    while ( my $elem= readdir(RUNDIR) )
452
    {
453
      # Only read pid from files that end with .pid
454
      if ( $elem =~ /.*[.]pid$/)
455
      {
456
	my $pidfile= "$rundir/$elem";
457
458
	if ( -f $pidfile )
459
	{
460
	  mtr_debug("Processing PID file: '$pidfile'...");
461
462
	  my $pid= mtr_get_pid_from_file($pidfile);
463
464
	  mtr_debug("Got pid: $pid from file '$pidfile'");
465
466
	  if ( $::glob_cygwin_perl or kill(0, $pid) )
467
	  {
468
	    mtr_debug("There is process with pid $pid -- scheduling for kill.");
469
	    push(@pids, $pid);            # We know (cygwin guess) it exists
470
	  }
471
	  else
472
	  {
473
	    mtr_debug("There is no process with pid $pid -- skipping.");
474
	  }
475
	}
476
      }
477
      else
478
      {
479
	mtr_warning("Found non pid file $elem in $rundir")
480
	  if -f "$rundir/$elem";
481
	next;
482
      }
483
    }
484
    closedir(RUNDIR);
485
486
    if ( @pids )
487
    {
488
      mtr_debug("Killing the following processes with PID files: " .
489
                join(' ', @pids) . "...");
490
491
      start_reap_all();
492
493
      if ( $::glob_cygwin_perl )
494
      {
495
        # We have no (easy) way of knowing the Cygwin controlling
496
        # process, in the PID file we only have the Windows process id.
497
        system("kill -f " . join(" ",@pids)); # Hope for the best....
498
        mtr_debug("Sleep 5 seconds waiting for processes to die");
499
        sleep(5);
500
      }
501
      else
502
      {
503
        my $retries= 10;                    # 10 seconds
504
        do
505
        {
506
          mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids));
507
          kill(9, @pids);
508
          mtr_report("Sleep 1 second waiting for processes to die");
509
          sleep(1)                      # Wait one second
510
        } while ( $retries-- and  kill(0, @pids) );
511
512
        if ( kill(0, @pids) )           # Check if some left
513
        {
514
          mtr_warning("can't kill process(es) " . join(" ", @pids));
515
        }
516
      }
517
518
      stop_reap_all();
519
    }
520
  }
521
  else
522
  {
523
    mtr_debug("Directory for PID files ($rundir) does not exist.");
524
  }
525
526
  # We may have failed everything, but we now check again if we have
527
  # the listen ports free to use, and if they are free, just go for it.
528
529
  mtr_debug("Checking known mysqld servers...");
530
531
  foreach my $srv ( @kill_pids )
532
  {
533
    if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) )
534
    {
535
      mtr_warning("can't kill old process holding port $srv->{'port'}");
536
    }
537
  }
538
539
  mtr_debug("mtr_kill_leftovers(): finished.");
540
}
541
542
543
#
544
# Check that all processes in "spec" are shutdown gracefully
545
# else kill them off hard
546
#
547
sub mtr_check_stop_servers ($) {
548
  my $spec=  shift;
549
550
  # Return if no processes are defined
551
  return if ! @$spec;
552
553
  mtr_verbose("mtr_check_stop_servers");
554
555
  # ----------------------------------------------------------------------
556
  # Wait until servers in "spec" has stopped listening
557
  # to their ports or timeout occurs
558
  # ----------------------------------------------------------------------
559
  mtr_ping_with_timeout(\@$spec);
560
561
  # ----------------------------------------------------------------------
562
  # Use waitpid() nonblocking for a little while, to see how
563
  # many process's will exit sucessfully.
564
  # This is the normal case.
565
  # ----------------------------------------------------------------------
566
  my $wait_counter= 50; # Max number of times to redo the loop
567
  foreach my $srv ( @$spec )
568
  {
569
    my $pid= $srv->{'pid'};
570
    my $ret_pid;
571
    if ( $pid )
572
    {
573
      $ret_pid= waitpid($pid,&WNOHANG);
574
      if ($ret_pid == $pid)
575
      {
576
	mtr_verbose("Caught exit of process $ret_pid");
577
	$srv->{'pid'}= 0;
578
      }
579
      elsif ($ret_pid == 0)
580
      {
581
	mtr_verbose("Process $pid is still alive");
582
	if ($wait_counter-- > 0)
583
	{
584
	  # Give the processes more time to exit
585
	  select(undef, undef, undef, (0.1));
586
	  redo;
587
	}
588
      }
589
      else
590
      {
591
	mtr_warning("caught exit of unknown child $ret_pid");
592
      }
593
    }
594
  }
595
596
  # ----------------------------------------------------------------------
597
  # The processes that haven't yet exited need to
598
  # be killed hard, put them in "kill_pids" hash
599
  # ----------------------------------------------------------------------
600
  my %kill_pids;
601
  foreach my $srv ( @$spec )
602
  {
603
    my $pid= $srv->{'pid'};
604
    if ( $pid )
605
    {
606
      # Server is still alive, put it in list to be hard killed
607
      if ($::glob_win32_perl)
608
      {
609
	# Kill the real process if it's known
610
	$pid= $srv->{'real_pid'} if ($srv->{'real_pid'});
611
      }
612
      $kill_pids{$pid}= 1;
613
614
      # Write a message to the process's error log (if it has one)
615
      # that it's being killed hard.
616
      if ( defined $srv->{'errfile'} )
617
      {
618
	mtr_tofile($srv->{'errfile'}, "Note: Forcing kill of process $pid\n");
619
      }
620
      mtr_warning("Forcing kill of process $pid");
621
622
    }
623
    else
624
    {
625
      # Server is dead, remove the pidfile if it exists
626
      #
627
      # Race, could have been removed between test with -f
628
      # and the unlink() below, so better check again with -f
629
      if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
630
           -f $srv->{'pidfile'} )
631
      {
632
        mtr_error("can't remove $srv->{'pidfile'}");
633
      }
634
    }
635
  }
636
637
  if ( ! keys %kill_pids )
638
  {
639
    # All processes has exited gracefully
640
    return;
641
  }
642
643
  mtr_kill_processes(\%kill_pids);
644
645
  # ----------------------------------------------------------------------
646
  # All processes are killed, cleanup leftover files
647
  # ----------------------------------------------------------------------
648
  {
649
    my $errors= 0;
650
    foreach my $srv ( @$spec )
651
    {
652
      if ( $srv->{'pid'} )
653
      {
654
	# Server has been hard killed, clean it's resources
655
	foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
656
        {
657
	  # Know it is dead so should be no race, careful anyway
658
	  if ( defined $file and -f $file and ! unlink($file) and -f $file )
659
          {
660
	    $errors++;
661
	    mtr_warning("couldn't delete $file");
662
	  }
663
	}
664
665
	if ($::glob_win32_perl and $srv->{'real_pid'})
666
	{
667
	  # Wait for the pseudo pid - if the real_pid was known
668
	  # the pseudo pid has not been waited for yet, wai blocking
669
	  # since it's "such a simple program"
670
	  mtr_verbose("Wait for pseudo process $srv->{'pid'}");
671
	  my $ret_pid= waitpid($srv->{'pid'}, 0);
672
	  mtr_verbose("Pseudo process $ret_pid died");
673
	}
674
675
	$srv->{'pid'}= 0;
676
      }
677
    }
678
    if ( $errors )
679
    {
680
      # There where errors killing processes
681
      # do one last attempt to ping the servers
682
      # and if they can't be pinged, assume they are dead
683
      if ( ! mtr_ping_with_timeout( \@$spec ) )
684
      {
685
	mtr_error("we could not kill or clean up all processes");
686
      }
687
      else
688
      {
689
	mtr_verbose("All ports were free, continuing");
690
      }
691
    }
692
  }
693
}
694
695
696
# Wait for all the process in the list to terminate
697
sub mtr_wait_blocking($) {
698
  my $admin_pids= shift;
699
700
701
  # Return if no processes defined
702
  return if ! %$admin_pids;
703
704
  mtr_verbose("mtr_wait_blocking");
705
706
  # Wait for all the started processes to exit
707
  # As mysqladmin is such a simple program, we trust it to terminate itself.
708
  # I.e. we wait blocking, and wait for them all before we go on.
709
  foreach my $pid (keys %{$admin_pids})
710
  {
711
    my $ret_pid= waitpid($pid,0);
712
713
  }
714
}
715
716
# Start "mysqladmin <command>" for a specific mysqld
717
sub mtr_mysqladmin_start($$$) {
718
  my $srv= shift;
719
  my $command= shift;
720
  my $adm_shutdown_tmo= shift;
721
722
  my $args;
723
  mtr_init_args(\$args);
724
725
  mtr_add_arg($args, "--no-defaults");
726
  mtr_add_arg($args, "--user=%s", $::opt_user);
727
  mtr_add_arg($args, "--password=");
728
  mtr_add_arg($args, "--silent");
729
  if ( -e $srv->{'path_sock'} )
730
  {
731
    mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'});
732
  }
733
  if ( $srv->{'port'} )
734
  {
735
    mtr_add_arg($args, "--port=%s", $srv->{'port'});
736
  }
737
  if ( $srv->{'port'} and ! -e $srv->{'path_sock'} )
738
  {
739
    mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
740
  }
741
  mtr_add_arg($args, "--connect_timeout=5");
742
743
  # Shutdown time must be high as slave may be in reconnect
744
  mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo");
745
  mtr_add_arg($args, "$command");
746
  my $pid= mtr_spawn($::exe_mysqladmin, $args,
747
		     "", "", "", "",
748
		     { append_log_file => 1 });
749
  mtr_verbose("mtr_mysqladmin_start, pid: $pid");
750
  return $pid;
751
752
}
753
754
# Start "ndb_mgm shutdown" for a specific cluster, it will
755
# shutdown all data nodes and leave the ndb_mgmd running
756
sub mtr_ndbmgm_start($$) {
757
  my $cluster= shift;
758
  my $command= shift;
759
760
  my $args;
761
762
  mtr_init_args(\$args);
763
764
  mtr_add_arg($args, "--no-defaults");
765
  mtr_add_arg($args, "--core");
766
  mtr_add_arg($args, "--try-reconnect=1");
767
  mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'});
768
  mtr_add_arg($args, "-e");
769
  mtr_add_arg($args, "$command");
770
771
  my $pid= mtr_spawn($::exe_ndb_mgm, $args,
772
		     "", "/dev/null", "/dev/null", "",
773
		     {});
774
  mtr_verbose("mtr_ndbmgm_start, pid: $pid");
775
  return $pid;
776
777
}
778
779
780
# Ping all servers in list, exit when none of them answers
781
# or when timeout has passed
782
sub mtr_ping_with_timeout($) {
783
  my $spec= shift;
784
  my $timeout= 200;                     # 20 seconds max
785
  my $res= 1;                           # If we just fall through, we are done
786
                                        # in the sense that the servers don't
787
                                        # listen to their ports any longer
788
789
  mtr_debug("Waiting for mysqld servers to stop...");
790
791
 TIME:
792
  while ( $timeout-- )
793
  {
794
    foreach my $srv ( @$spec )
795
    {
796
      $res= 1;                          # We are optimistic
797
      if ( $srv->{'pid'} and defined $srv->{'port'} )
798
      {
799
	if ( mtr_ping_port($srv->{'port'}) )
800
	{
801
	  mtr_verbose("waiting for process $srv->{'pid'} to stop ".
802
		      "using port $srv->{'port'}");
803
804
	  # Millisceond sleep emulated with select
805
	  select(undef, undef, undef, (0.1));
806
	  $res= 0;
807
	  next TIME;
808
	}
809
	else
810
	{
811
	  # Process was not using port
812
	}
813
      }
814
    }
815
    last;                               # If we got here, we are done
816
  }
817
818
  if ($res)
819
  {
820
    mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down.");
821
  }
822
  else
823
  {
824
    mtr_report("mtr_ping_with_timeout(): At least one server is alive.");
825
  }
826
827
  return $res;
828
}
829
830
831
#
832
# Loop through our list of processes and look for and entry
833
# with the provided pid
834
# Set the pid of that process to 0 if found
835
#
836
sub mark_process_dead($)
837
{
838
  my $ret_pid= shift;
839
840
  foreach my $mysqld (@{$::master}, @{$::slave})
841
  {
842
    if ( $mysqld->{'pid'} eq $ret_pid )
843
    {
844
      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
845
      $mysqld->{'pid'}= 0;
846
      return;
847
    }
848
  }
849
850
  foreach my $cluster (@{$::clusters})
851
  {
852
    if ( $cluster->{'pid'} eq $ret_pid )
853
    {
854
      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
855
      $cluster->{'pid'}= 0;
856
      return;
857
    }
858
859
    foreach my $ndbd (@{$cluster->{'ndbds'}})
860
    {
861
      if ( $ndbd->{'pid'} eq $ret_pid )
862
      {
863
	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
864
	$ndbd->{'pid'}= 0;
865
	return;
866
      }
867
    }
868
  }
869
  mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid");
870
871
}
872
873
#
874
# Loop through our list of processes and look for and entry
875
# with the provided pid, if found check for the file indicating
876
# expected crash and restart it.
877
#
878
sub check_expected_crash_and_restart($)
879
{
880
  my $ret_pid= shift;
881
882
  foreach my $mysqld (@{$::master}, @{$::slave})
883
  {
884
    if ( $mysqld->{'pid'} eq $ret_pid )
885
    {
886
      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
887
      $mysqld->{'pid'}= 0;
888
889
      # Check if crash expected and restart if it was
890
      my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" .
891
	"$mysqld->{'idx'}" . ".expect";
892
      if ( -f $expect_file )
893
      {
894
	mtr_verbose("Crash was expected, file $expect_file exists");
895
	mysqld_start($mysqld, $mysqld->{'start_opts'},
896
		     $mysqld->{'start_slave_master_info'});
897
	unlink($expect_file);
898
      }
899
900
      return;
901
    }
902
  }
903
904
  foreach my $cluster (@{$::clusters})
905
  {
906
    if ( $cluster->{'pid'} eq $ret_pid )
907
    {
908
      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
909
      $cluster->{'pid'}= 0;
910
911
      # Check if crash expected and restart if it was
912
      my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" .
913
	".expect";
914
      if ( -f $expect_file )
915
      {
916
	mtr_verbose("Crash was expected, file $expect_file exists");
917
	ndbmgmd_start($cluster);
918
	unlink($expect_file);
919
      }
920
      return;
921
    }
922
923
    foreach my $ndbd (@{$cluster->{'ndbds'}})
924
    {
925
      if ( $ndbd->{'pid'} eq $ret_pid )
926
      {
927
	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
928
	$ndbd->{'pid'}= 0;
929
930
	# Check if crash expected and restart if it was
931
	my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" .
932
	  "$ndbd->{'idx'}" . ".expect";
933
	if ( -f $expect_file )
934
	{
935
	  mtr_verbose("Crash was expected, file $expect_file exists");
936
	  ndbd_start($cluster, $ndbd->{'idx'},
937
		     $ndbd->{'start_extra_args'});
938
	  unlink($expect_file);
939
	}
940
	return;
941
      }
942
    }
943
  }
944
945
  if ($::instance_manager->{'spawner_pid'} eq $ret_pid)
946
  {
947
    return;
948
  }
949
950
  mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid");
951
952
}
953
954
##############################################################################
955
#
956
#  The operating system will keep information about dead children, 
957
#  we read this information here, and if we have records the process
958
#  is alive, we mark it as dead.
959
#
960
##############################################################################
961
962
sub mtr_record_dead_children () {
963
964
  my $process_died= 0;
965
  my $ret_pid;
966
967
  # Wait without blockinng to see if any processes had died
968
  # -1 or 0 means there are no more procesess to wait for
969
  while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1)
970
  {
971
    mtr_warning("mtr_record_dead_children: $ret_pid");
972
    mark_process_dead($ret_pid);
973
    $process_died= 1;
974
  }
975
  return $process_died;
976
}
977
978
sub start_reap_all {
979
  # This causes terminating processes to not become zombies, avoiding
980
  # the need for (or possibility of) explicit waitpid().
981
  $SIG{CHLD}= 'IGNORE';
982
983
  # On some platforms (Linux, QNX, OSX, ...) there is potential race
984
  # here. If a process terminated before setting $SIG{CHLD} (but after
985
  # any attempt to waitpid() it), it will still be a zombie. So we
986
  # have to handle any such process here.
987
  my $pid;
988
  while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1)
989
  {
990
    mtr_warning("start_reap_all pid: $pid");
991
    mark_process_dead($pid);
992
  };
993
}
994
995
sub stop_reap_all {
996
  $SIG{CHLD}= 'DEFAULT';
997
}
998
999
1000
sub mtr_ping_port ($) {
1001
  my $port= shift;
1002
1003
  mtr_verbose("mtr_ping_port: $port");
1004
1005
  my $remote= "localhost";
1006
  my $iaddr=  inet_aton($remote);
1007
  if ( ! $iaddr )
1008
  {
1009
    mtr_error("can't find IP number for $remote");
1010
  }
1011
  my $paddr=  sockaddr_in($port, $iaddr);
1012
  my $proto=  getprotobyname('tcp');
1013
  if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) )
1014
  {
1015
    mtr_error("can't create socket: $!");
1016
  }
1017
1018
  mtr_debug("Pinging server (port: $port)...");
1019
1020
  if ( connect(SOCK, $paddr) )
1021
  {
1022
    close(SOCK);                        # FIXME check error?
1023
    mtr_verbose("USED");
1024
    return 1;
1025
  }
1026
  else
1027
  {
1028
    mtr_verbose("FREE");
1029
    return 0;
1030
  }
1031
}
1032
1033
##############################################################################
1034
#
1035
#  Wait for a file to be created
1036
#
1037
##############################################################################
1038
1039
# FIXME check that the pidfile contains the expected pid!
1040
1041
sub sleep_until_file_created ($$$) {
1042
  my $pidfile= shift;
1043
  my $timeout= shift;
1044
  my $pid=     shift;
1045
  my $sleeptime= 100; # Milliseconds
1046
  my $loops= ($timeout * 1000) / $sleeptime;
1047
1048
  for ( my $loop= 1; $loop <= $loops; $loop++ )
1049
  {
1050
    if ( -r $pidfile )
1051
    {
1052
      return 1;
1053
    }
1054
1055
    # Check if it died after the fork() was successful
1056
    if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid )
1057
    {
1058
      mtr_warning("Process $pid died");
1059
      return 0;
1060
    }
1061
1062
    mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile");
1063
1064
    # Print extra message every 60 seconds
1065
    my $seconds= ($loop * $sleeptime) / 1000;
1066
    if ( $seconds > 1 and int($seconds * 10) % 600 == 0 )
1067
    {
1068
      my $left= $timeout - $seconds;
1069
      mtr_warning("Waited $seconds seconds for $pidfile to be created, " .
1070
                  "still waiting for $left seconds...");
1071
    }
1072
1073
    # Millisceond sleep emulated with select
1074
    select(undef, undef, undef, ($sleeptime/1000));
1075
  }
1076
1077
  return 0;
1078
}
1079
1080
1081
sub mtr_kill_processes ($) {
1082
  my $pids = shift;
1083
1084
  mtr_verbose("mtr_kill_processes (" . join(" ", keys %{$pids}) . ")");
1085
1086
  foreach my $pid (keys %{$pids})
1087
  {
1088
1089
    if ($pid <= 0)
1090
    {
1091
      mtr_warning("Trying to kill illegal pid: $pid");
1092
      next;
1093
    }
1094
1095
    my $signaled_procs= kill(9, $pid);
1096
    if ($signaled_procs == 0)
1097
    {
1098
      # No such process existed, assume it's killed
1099
      mtr_verbose("killed $pid(no such process)");
1100
    }
1101
    else
1102
    {
1103
      my $ret_pid= waitpid($pid,0);
1104
      if ($ret_pid == $pid)
1105
      {
1106
	mtr_verbose("killed $pid(got the pid)");
1107
      }
1108
      elsif ($ret_pid == -1)
1109
      {
1110
	mtr_verbose("killed $pid(got -1)");
1111
      }
1112
    }
1113
  }
1114
  mtr_verbose("done killing processes");
1115
}
1116
1117
1118
##############################################################################
1119
#
1120
#  When we exit, we kill off all children
1121
#
1122
##############################################################################
1123
1124
sub mtr_exit ($) {
1125
  my $code= shift;
1126
  mtr_timer_stop_all($::glob_timers);
1127
  local $SIG{HUP} = 'IGNORE';
1128
  # ToDo: Signalling -$$ will only work if we are the process group
1129
  # leader (in fact on QNX it will signal our session group leader,
1130
  # which might be Do-compile or Pushbuild, causing tests to be
1131
  # aborted). So we only do it if we are the group leader. We might
1132
  # set ourselves as the group leader at startup (with
1133
  # POSIX::setpgrp(0,0)), but then care must be needed to always do
1134
  # proper child process cleanup.
1135
  POSIX::kill(SIGHUP, -$$) if !$::glob_win32_perl and $$ == getpgrp();
1136
1137
  exit($code);
1138
}
1139
1140
###########################################################################
1141
1142
1;