~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2003 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
17
/**
18
  @file
19
20
  @brief
21
  logging of commands
22
23
  @todo
24
    Abort logging when we get an error in reading or writing log files
25
*/
26
27
#include "mysql_priv.h"
28
#include "sql_repl.h"
29
#include "rpl_filter.h"
30
#include "rpl_rli.h"
31
32
#include <my_dir.h>
33
#include <stdarg.h>
34
#include <m_ctype.h>				// For test_if_number
35
36
#include <mysql/plugin.h>
37
38
/* max size of the log message */
39
#define MAX_LOG_BUFFER_SIZE 1024
40
#define MAX_USER_HOST_SIZE 512
41
#define MAX_TIME_SIZE 32
42
#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
43
44
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
45
46
LOGGER logger;
47
48
MYSQL_BIN_LOG mysql_bin_log;
49
ulong sync_binlog_counter= 0;
50
51
static bool test_if_number(const char *str,
52
			   long *res, bool allow_wildcards);
53
static int binlog_init(void *p);
54
static int binlog_close_connection(handlerton *hton, THD *thd);
55
static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
56
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
57
static int binlog_commit(handlerton *hton, THD *thd, bool all);
58
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
59
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
60
61
/**
62
  Silence all errors and warnings reported when performing a write
63
  to a log table.
64
  Errors and warnings are not reported to the client or SQL exception
65
  handlers, so that the presence of logging does not interfere and affect
66
  the logic of an application.
67
*/
68
class Silence_log_table_errors : public Internal_error_handler
69
{
70
  char m_message[MYSQL_ERRMSG_SIZE];
71
public:
72
  Silence_log_table_errors()
73
  {
74
    m_message[0]= '\0';
75
  }
76
77
  virtual ~Silence_log_table_errors() {}
78
79
  virtual bool handle_error(uint sql_errno, const char *message,
80
                            MYSQL_ERROR::enum_warning_level level,
81
                            THD *thd);
82
  const char *message() const { return m_message; }
83
};
84
85
bool
86
Silence_log_table_errors::handle_error(uint /* sql_errno */,
87
                                       const char *message_arg,
88
                                       MYSQL_ERROR::enum_warning_level /* level */,
89
                                       THD * /* thd */)
90
{
91
  strmake(m_message, message_arg, sizeof(m_message)-1);
92
  return TRUE;
93
}
94
95
96
sql_print_message_func sql_print_message_handlers[3] =
97
{
98
  sql_print_information,
99
  sql_print_warning,
100
  sql_print_error
101
};
102
103
104
char *make_default_log_name(char *buff,const char* log_ext)
105
{
106
  strmake(buff, pidfile_name, FN_REFLEN-5);
107
  return fn_format(buff, buff, mysql_data_home, log_ext,
108
                   MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
109
}
110
111
/*
112
  Helper class to hold a mutex for the duration of the
113
  block.
114
115
  Eliminates the need for explicit unlocking of mutexes on, e.g.,
116
  error returns.  On passing a null pointer, the sentry will not do
117
  anything.
118
 */
119
class Mutex_sentry
120
{
121
public:
122
  Mutex_sentry(pthread_mutex_t *mutex)
123
    : m_mutex(mutex)
124
  {
125
    if (m_mutex)
126
      pthread_mutex_lock(mutex);
127
  }
128
129
  ~Mutex_sentry()
130
  {
131
    if (m_mutex)
132
      pthread_mutex_unlock(m_mutex);
133
#ifndef DBUG_OFF
134
    m_mutex= 0;
135
#endif
136
  }
137
138
private:
139
  pthread_mutex_t *m_mutex;
140
141
  // It's not allowed to copy this object in any way
142
  Mutex_sentry(Mutex_sentry const&);
143
  void operator=(Mutex_sentry const&);
144
};
145
146
/*
147
  Helper class to store binary log transaction data.
148
*/
149
class binlog_trx_data {
150
public:
151
  binlog_trx_data()
152
    : at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
153
  {
154
    trans_log.end_of_file= max_binlog_cache_size;
155
  }
156
157
  ~binlog_trx_data()
158
  {
159
    DBUG_ASSERT(pending() == NULL);
160
    close_cached_file(&trans_log);
161
  }
162
163
  my_off_t position() const {
164
    return my_b_tell(&trans_log);
165
  }
166
167
  bool empty() const
168
  {
169
    return pending() == NULL && my_b_tell(&trans_log) == 0;
170
  }
171
172
  /*
173
    Truncate the transaction cache to a certain position. This
174
    includes deleting the pending event.
175
   */
176
  void truncate(my_off_t pos)
177
  {
178
    DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
179
    DBUG_PRINT("info", ("before_stmt_pos=%lu", (ulong) pos));
180
    delete pending();
181
    set_pending(0);
182
    reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
183
    if (pos < before_stmt_pos)
184
      before_stmt_pos= MY_OFF_T_UNDEF;
185
186
    /*
187
      The only valid positions that can be truncated to are at the
188
      beginning of a statement. We are relying on this fact to be able
189
      to set the at_least_one_stmt flag correctly. In other word, if
190
      we are truncating to the beginning of the transaction cache,
191
      there will be no statements in the cache, otherwhise, we will
192
      have at least one statement in the transaction cache.
193
     */
194
    at_least_one_stmt= (pos > 0);
195
  }
196
197
  /*
198
    Reset the entire contents of the transaction cache, emptying it
199
    completely.
200
   */
201
  void reset() {
202
    if (!empty())
203
      truncate(0);
204
    before_stmt_pos= MY_OFF_T_UNDEF;
205
    trans_log.end_of_file= max_binlog_cache_size;
206
  }
207
208
  Rows_log_event *pending() const
209
  {
210
    return m_pending;
211
  }
212
213
  void set_pending(Rows_log_event *const pending)
214
  {
215
    m_pending= pending;
216
  }
217
218
  IO_CACHE trans_log;                         // The transaction cache
219
220
  /**
221
    Boolean that is true if there is at least one statement in the
222
    transaction cache.
223
  */
224
  bool at_least_one_stmt;
225
226
private:
227
  /*
228
    Pending binrows event. This event is the event where the rows are
229
    currently written.
230
   */
231
  Rows_log_event *m_pending;
232
233
public:
234
  /*
235
    Binlog position before the start of the current statement.
236
  */
237
  my_off_t before_stmt_pos;
238
};
239
240
handlerton *binlog_hton;
241
242
243
/* Check if a given table is opened log table */
77.1.45 by Monty Taylor
Warning fixes.
244
int check_if_log_table(uint db_len __attribute__((__unused__)),
245
                       const char *db __attribute__((__unused__)),
246
                       uint table_name_len __attribute__((__unused__)),
247
                       const char *table_name __attribute__((__unused__)),
248
                       uint check_if_opened __attribute__((__unused__)))
1 by brian
clean slate
249
{
250
  return 0;
251
}
252
253
/* log event handlers */
254
255
bool Log_to_file_event_handler::
256
  log_error(enum loglevel level, const char *format,
257
            va_list args)
258
{
259
  return vprint_msg_to_log(level, format, args);
260
}
261
262
void Log_to_file_event_handler::init_pthread_objects()
263
{
264
  mysql_log.init_pthread_objects();
265
  mysql_slow_log.init_pthread_objects();
266
}
267
268
269
/** Wrapper around MYSQL_LOG::write() for slow log. */
270
271
bool Log_to_file_event_handler::
272
  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
273
           const char *user_host, uint user_host_len,
274
           ulonglong query_utime, ulonglong lock_utime, bool is_command,
275
           const char *sql_text, uint sql_text_len)
276
{
277
  return mysql_slow_log.write(thd, current_time, query_start_arg,
278
                              user_host, user_host_len,
279
                              query_utime, lock_utime, is_command,
280
                              sql_text, sql_text_len);
281
}
282
283
284
/**
285
   Wrapper around MYSQL_LOG::write() for general log. We need it since we
286
   want all log event handlers to have the same signature.
287
*/
288
289
bool Log_to_file_event_handler::
77.1.45 by Monty Taylor
Warning fixes.
290
  log_general(THD *thd __attribute__((__unused__)),
291
              time_t event_time, const char *user_host,
1 by brian
clean slate
292
              uint user_host_len, int thread_id,
293
              const char *command_type, uint command_type_len,
294
              const char *sql_text, uint sql_text_len,
77.1.45 by Monty Taylor
Warning fixes.
295
              CHARSET_INFO *client_cs __attribute__((__unused__)))
1 by brian
clean slate
296
{
297
  return mysql_log.write(event_time, user_host, user_host_len,
298
                         thread_id, command_type, command_type_len,
299
                         sql_text, sql_text_len);
300
}
301
302
303
bool Log_to_file_event_handler::init()
304
{
305
  if (!is_initialized)
306
  {
307
    if (opt_slow_log)
308
      mysql_slow_log.open_slow_log(sys_var_slow_log_path.value);
309
310
    if (opt_log)
311
      mysql_log.open_query_log(sys_var_general_log_path.value);
312
313
    is_initialized= TRUE;
314
  }
315
316
  return FALSE;
317
}
318
319
320
void Log_to_file_event_handler::cleanup()
321
{
322
  mysql_log.cleanup();
323
  mysql_slow_log.cleanup();
324
}
325
326
void Log_to_file_event_handler::flush()
327
{
328
  /* reopen log files */
329
  if (opt_log)
330
    mysql_log.reopen_file();
331
  if (opt_slow_log)
332
    mysql_slow_log.reopen_file();
333
}
334
335
/*
336
  Log error with all enabled log event handlers
337
338
  SYNOPSIS
339
    error_log_print()
340
341
    level             The level of the error significance: NOTE,
342
                      WARNING or ERROR.
343
    format            format string for the error message
344
    args              list of arguments for the format string
345
346
  RETURN
347
    FALSE - OK
348
    TRUE - error occured
349
*/
350
351
bool LOGGER::error_log_print(enum loglevel level, const char *format,
352
                             va_list args)
353
{
354
  bool error= FALSE;
355
  Log_event_handler **current_handler;
356
357
  /* currently we don't need locking here as there is no error_log table */
358
  for (current_handler= error_log_handler_list ; *current_handler ;)
359
    error= (*current_handler++)->log_error(level, format, args) || error;
360
361
  return error;
362
}
363
364
365
void LOGGER::cleanup_base()
366
{
367
  DBUG_ASSERT(inited == 1);
368
  rwlock_destroy(&LOCK_logger);
369
  if (file_log_handler)
370
    file_log_handler->cleanup();
371
}
372
373
374
void LOGGER::cleanup_end()
375
{
376
  DBUG_ASSERT(inited == 1);
377
  if (file_log_handler)
378
    delete file_log_handler;
379
}
380
381
382
/**
383
  Perform basic log initialization: create file-based log handler and
384
  init error log.
385
*/
386
void LOGGER::init_base()
387
{
388
  DBUG_ASSERT(inited == 0);
389
  inited= 1;
390
391
  /*
392
    Here we create file log handler. We don't do it for the table log handler
393
    here as it cannot be created so early. The reason is THD initialization,
394
    which depends on the system variables (parsed later).
395
  */
396
  if (!file_log_handler)
397
    file_log_handler= new Log_to_file_event_handler;
398
399
  /* by default we use traditional error log */
400
  init_error_log(LOG_FILE);
401
402
  file_log_handler->init_pthread_objects();
403
  my_rwlock_init(&LOCK_logger, NULL);
404
}
405
406
77.1.45 by Monty Taylor
Warning fixes.
407
bool LOGGER::flush_logs(THD *thd __attribute__((__unused__)))
1 by brian
clean slate
408
{
409
  int rc= 0;
410
411
  /*
412
    Now we lock logger, as nobody should be able to use logging routines while
413
    log tables are closed
414
  */
415
  logger.lock_exclusive();
416
417
  /* reopen log files */
418
  file_log_handler->flush();
419
420
  /* end of log flush */
421
  logger.unlock();
422
  return rc;
423
}
424
425
426
/*
427
  Log slow query with all enabled log event handlers
428
429
  SYNOPSIS
430
    slow_log_print()
431
432
    thd                 THD of the query being logged
433
    query               The query being logged
434
    query_length        The length of the query string
435
    current_utime       Current time in microseconds (from undefined start)
436
437
  RETURN
438
    FALSE   OK
439
    TRUE    error occured
440
*/
441
442
bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
443
                            ulonglong current_utime)
444
445
{
446
  bool error= FALSE;
447
  Log_event_handler **current_handler;
448
  bool is_command= FALSE;
449
  char user_host_buff[MAX_USER_HOST_SIZE];
450
  Security_context *sctx= thd->security_ctx;
451
  uint user_host_len= 0;
452
  ulonglong query_utime, lock_utime;
453
454
  /*
455
    Print the message to the buffer if we have slow log enabled
456
  */
457
458
  if (*slow_log_handler_list)
459
  {
460
    time_t current_time;
461
462
    /* do not log slow queries from replication threads */
463
    if (thd->slave_thread && !opt_log_slow_slave_statements)
464
      return 0;
465
466
    lock_shared();
467
    if (!opt_slow_log)
468
    {
469
      unlock();
470
      return 0;
471
    }
472
473
    /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
474
    user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
475
                             sctx->priv_user ? sctx->priv_user : "", "[",
476
                             sctx->user ? sctx->user : "", "] @ ",
477
                             sctx->host ? sctx->host : "", " [",
478
                             sctx->ip ? sctx->ip : "", "]", NullS) -
479
                    user_host_buff);
480
481
    current_time= my_time_possible_from_micro(current_utime);
482
    if (thd->start_utime)
483
    {
484
      query_utime= (current_utime - thd->start_utime);
485
      lock_utime=  (thd->utime_after_lock - thd->start_utime);
486
    }
487
    else
488
    {
489
      query_utime= lock_utime= 0;
490
    }
491
492
    if (!query)
493
    {
494
      is_command= TRUE;
495
      query= command_name[thd->command].str;
496
      query_length= command_name[thd->command].length;
497
    }
498
499
    for (current_handler= slow_log_handler_list; *current_handler ;)
500
      error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
501
                                            user_host_buff, user_host_len,
502
                                            query_utime, lock_utime, is_command,
503
                                            query, query_length) || error;
504
505
    unlock();
506
  }
507
  return error;
508
}
509
510
bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
511
                               const char *query, uint query_length)
512
{
513
  bool error= FALSE;
514
  Log_event_handler **current_handler= general_log_handler_list;
515
  char user_host_buff[MAX_USER_HOST_SIZE];
516
  Security_context *sctx= thd->security_ctx;
517
  ulong id;
518
  uint user_host_len= 0;
519
  time_t current_time;
520
521
  if (thd)
522
    id= thd->thread_id;                 /* Normal thread */
523
  else
524
    id= 0;                              /* Log from connect handler */
525
526
  lock_shared();
527
  if (!opt_log)
528
  {
529
    unlock();
530
    return 0;
531
  }
532
  user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
533
                          sctx->priv_user ? sctx->priv_user : "", "[",
534
                          sctx->user ? sctx->user : "", "] @ ",
535
                          sctx->host ? sctx->host : "", " [",
536
                          sctx->ip ? sctx->ip : "", "]", NullS) -
537
                                                          user_host_buff;
538
539
  current_time= my_time(0);
540
541
  while (*current_handler)
542
    error|= (*current_handler++)->
543
      log_general(thd, current_time, user_host_buff,
544
                  user_host_len, id,
545
                  command_name[(uint) command].str,
546
                  command_name[(uint) command].length,
547
                  query, query_length,
548
                  thd->variables.character_set_client) || error;
549
  unlock();
550
551
  return error;
552
}
553
554
bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
555
                               const char *format, va_list args)
556
{
557
  uint message_buff_len= 0;
558
  char message_buff[MAX_LOG_BUFFER_SIZE];
559
560
  /* prepare message */
561
  if (format)
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
562
    message_buff_len= vsnprintf(message_buff, sizeof(message_buff),
1 by brian
clean slate
563
                                   format, args);
564
  else
565
    message_buff[0]= '\0';
566
567
  return general_log_write(thd, command, message_buff, message_buff_len);
568
}
569
570
void LOGGER::init_error_log(uint error_log_printer)
571
{
572
  if (error_log_printer & LOG_NONE)
573
  {
574
    error_log_handler_list[0]= 0;
575
    return;
576
  }
577
578
  switch (error_log_printer) {
579
  case LOG_FILE:
580
    error_log_handler_list[0]= file_log_handler;
581
    error_log_handler_list[1]= 0;
582
    break;
583
    /* these two are disabled for now */
584
  case LOG_TABLE:
585
    DBUG_ASSERT(0);
586
    break;
587
  case LOG_TABLE|LOG_FILE:
588
    DBUG_ASSERT(0);
589
    break;
590
  }
591
}
592
593
void LOGGER::init_slow_log(uint slow_log_printer)
594
{
595
  if (slow_log_printer & LOG_NONE)
596
  {
597
    slow_log_handler_list[0]= 0;
598
    return;
599
  }
600
601
  slow_log_handler_list[0]= file_log_handler;
602
  slow_log_handler_list[1]= 0;
603
}
604
605
void LOGGER::init_general_log(uint general_log_printer)
606
{
607
  if (general_log_printer & LOG_NONE)
608
  {
609
    general_log_handler_list[0]= 0;
610
    return;
611
  }
612
613
  general_log_handler_list[0]= file_log_handler;
614
  general_log_handler_list[1]= 0;
615
}
616
617
77.1.45 by Monty Taylor
Warning fixes.
618
bool LOGGER::activate_log_handler(THD* thd __attribute__((__unused__)),
619
                                  uint log_type)
1 by brian
clean slate
620
{
621
  MYSQL_QUERY_LOG *file_log;
622
  bool res= FALSE;
623
  lock_exclusive();
624
  switch (log_type) {
625
  case QUERY_LOG_SLOW:
626
    if (!opt_slow_log)
627
    {
628
      file_log= file_log_handler->get_mysql_slow_log();
629
630
      file_log->open_slow_log(sys_var_slow_log_path.value);
631
      init_slow_log(log_output_options);
632
      opt_slow_log= TRUE;
633
    }
634
    break;
635
  case QUERY_LOG_GENERAL:
636
    if (!opt_log)
637
    {
638
      file_log= file_log_handler->get_mysql_log();
639
640
      file_log->open_query_log(sys_var_general_log_path.value);
641
      init_general_log(log_output_options);
642
      opt_log= TRUE;
643
    }
644
    break;
645
  default:
646
    DBUG_ASSERT(0);
647
  }
648
  unlock();
649
  return res;
650
}
651
652
77.1.45 by Monty Taylor
Warning fixes.
653
void LOGGER::deactivate_log_handler(THD *thd __attribute__((__unused__)),
654
                                    uint log_type)
1 by brian
clean slate
655
{
656
  my_bool *tmp_opt= 0;
657
  MYSQL_LOG *file_log;
658
659
  switch (log_type) {
660
  case QUERY_LOG_SLOW:
661
    tmp_opt= &opt_slow_log;
662
    file_log= file_log_handler->get_mysql_slow_log();
663
    break;
664
  case QUERY_LOG_GENERAL:
665
    tmp_opt= &opt_log;
666
    file_log= file_log_handler->get_mysql_log();
667
    break;
668
  default:
669
    assert(0);                                  // Impossible
670
  }
671
672
  if (!(*tmp_opt))
673
    return;
674
675
  lock_exclusive();
676
  file_log->close(0);
677
  *tmp_opt= FALSE;
678
  unlock();
679
}
680
681
int LOGGER::set_handlers(uint error_log_printer,
682
                         uint slow_log_printer,
683
                         uint general_log_printer)
684
{
685
  /* error log table is not supported yet */
686
  DBUG_ASSERT(error_log_printer < LOG_TABLE);
687
688
  lock_exclusive();
689
690
  init_error_log(error_log_printer);
691
  init_slow_log(slow_log_printer);
692
  init_general_log(general_log_printer);
693
694
  unlock();
695
696
  return 0;
697
}
698
699
700
 /*
701
  Save position of binary log transaction cache.
702
703
  SYNPOSIS
704
    binlog_trans_log_savepos()
705
706
    thd      The thread to take the binlog data from
707
    pos      Pointer to variable where the position will be stored
708
709
  DESCRIPTION
710
711
    Save the current position in the binary log transaction cache into
712
    the variable pointed to by 'pos'
713
 */
714
715
static void
716
binlog_trans_log_savepos(THD *thd, my_off_t *pos)
717
{
718
  DBUG_ENTER("binlog_trans_log_savepos");
719
  DBUG_ASSERT(pos != NULL);
720
  if (thd_get_ha_data(thd, binlog_hton) == NULL)
721
    thd->binlog_setup_trx_data();
722
  binlog_trx_data *const trx_data=
723
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
724
  DBUG_ASSERT(mysql_bin_log.is_open());
725
  *pos= trx_data->position();
726
  DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
727
  DBUG_VOID_RETURN;
728
}
729
730
731
/*
732
  Truncate the binary log transaction cache.
733
734
  SYNPOSIS
735
    binlog_trans_log_truncate()
736
737
    thd      The thread to take the binlog data from
738
    pos      Position to truncate to
739
740
  DESCRIPTION
741
742
    Truncate the binary log to the given position. Will not change
743
    anything else.
744
745
 */
746
static void
747
binlog_trans_log_truncate(THD *thd, my_off_t pos)
748
{
749
  DBUG_ENTER("binlog_trans_log_truncate");
750
  DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
751
752
  DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
753
  /* Only true if binlog_trans_log_savepos() wasn't called before */
754
  DBUG_ASSERT(pos != ~(my_off_t) 0);
755
756
  binlog_trx_data *const trx_data=
757
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
758
  trx_data->truncate(pos);
759
  DBUG_VOID_RETURN;
760
}
761
762
763
/*
764
  this function is mostly a placeholder.
765
  conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
766
  should be moved here.
767
*/
768
769
int binlog_init(void *p)
770
{
771
  binlog_hton= (handlerton *)p;
772
  binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
773
  binlog_hton->db_type=DB_TYPE_BINLOG;
774
  binlog_hton->savepoint_offset= sizeof(my_off_t);
775
  binlog_hton->close_connection= binlog_close_connection;
776
  binlog_hton->savepoint_set= binlog_savepoint_set;
777
  binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
778
  binlog_hton->commit= binlog_commit;
779
  binlog_hton->rollback= binlog_rollback;
780
  binlog_hton->prepare= binlog_prepare;
781
  binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
782
  return 0;
783
}
784
77.1.45 by Monty Taylor
Warning fixes.
785
static int binlog_close_connection(handlerton *hton __attribute__((__unused__)),
786
                                   THD *thd)
1 by brian
clean slate
787
{
788
  binlog_trx_data *const trx_data=
789
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
790
  DBUG_ASSERT(trx_data->empty());
791
  thd_set_ha_data(thd, binlog_hton, NULL);
792
  trx_data->~binlog_trx_data();
793
  my_free((uchar*)trx_data, MYF(0));
794
  return 0;
795
}
796
797
/*
798
  End a transaction.
799
800
  SYNOPSIS
801
    binlog_end_trans()
802
803
    thd      The thread whose transaction should be ended
804
    trx_data Pointer to the transaction data to use
805
    end_ev   The end event to use, or NULL
806
    all      True if the entire transaction should be ended, false if
807
             only the statement transaction should be ended.
808
809
  DESCRIPTION
810
811
    End the currently open transaction. The transaction can be either
812
    a real transaction (if 'all' is true) or a statement transaction
813
    (if 'all' is false).
814
815
    If 'end_ev' is NULL, the transaction is a rollback of only
816
    transactional tables, so the transaction cache will be truncated
817
    to either just before the last opened statement transaction (if
818
    'all' is false), or reset completely (if 'all' is true).
819
 */
820
static int
821
binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
822
                 Log_event *end_ev, bool all)
823
{
824
  DBUG_ENTER("binlog_end_trans");
825
  int error=0;
826
  IO_CACHE *trans_log= &trx_data->trans_log;
827
  DBUG_PRINT("enter", ("transaction: %s  end_ev: 0x%lx",
828
                       all ? "all" : "stmt", (long) end_ev));
829
  DBUG_PRINT("info", ("thd->options={ %s%s}",
830
                      FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
831
                      FLAGSTR(thd->options, OPTION_BEGIN)));
832
833
  /*
834
    NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
835
    only transactional tables.  If the transaction contain changes to
836
    any non-transactiona tables, we need write the transaction and log
837
    a ROLLBACK last.
838
  */
839
  if (end_ev != NULL)
840
  {
841
    /*
842
      Doing a commit or a rollback including non-transactional tables,
843
      i.e., ending a transaction where we might write the transaction
844
      cache to the binary log.
845
846
      We can always end the statement when ending a transaction since
847
      transactions are not allowed inside stored functions.  If they
848
      were, we would have to ensure that we're not ending a statement
849
      inside a stored function.
850
     */
851
    thd->binlog_flush_pending_rows_event(TRUE);
852
853
    error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
854
    trx_data->reset();
855
856
    /*
857
      We need to step the table map version after writing the
858
      transaction cache to disk.
859
    */
860
    mysql_bin_log.update_table_map_version();
861
    statistic_increment(binlog_cache_use, &LOCK_status);
862
    if (trans_log->disk_writes != 0)
863
    {
864
      statistic_increment(binlog_cache_disk_use, &LOCK_status);
865
      trans_log->disk_writes= 0;
866
    }
867
  }
868
  else
869
  {
870
    /*
871
      If rolling back an entire transaction or a single statement not
872
      inside a transaction, we reset the transaction cache.
873
874
      If rolling back a statement in a transaction, we truncate the
875
      transaction cache to remove the statement.
876
     */
877
    if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
878
    {
879
      trx_data->reset();
880
881
      DBUG_ASSERT(!thd->binlog_get_pending_rows_event());
882
      thd->clear_binlog_table_maps();
883
    }
884
    else                                        // ...statement
885
      trx_data->truncate(trx_data->before_stmt_pos);
886
887
    /*
888
      We need to step the table map version on a rollback to ensure
889
      that a new table map event is generated instead of the one that
890
      was written to the thrown-away transaction cache.
891
    */
892
    mysql_bin_log.update_table_map_version();
893
  }
894
895
  DBUG_RETURN(error);
896
}
897
77.1.45 by Monty Taylor
Warning fixes.
898
static int binlog_prepare(handlerton *hton __attribute__((__unused__)),
899
                          THD *thd __attribute__((__unused__)),
900
                          bool all __attribute__((__unused__)))
1 by brian
clean slate
901
{
902
  /*
903
    do nothing.
904
    just pretend we can do 2pc, so that MySQL won't
905
    switch to 1pc.
906
    real work will be done in MYSQL_BIN_LOG::log_xid()
907
  */
908
  return 0;
909
}
910
911
#define YESNO(X) ((X) ? "yes" : "no")
912
913
/**
914
  This function is called once after each statement.
915
916
  It has the responsibility to flush the transaction cache to the
917
  binlog file on commits.
918
919
  @param hton  The binlog handlerton.
920
  @param thd   The client thread that executes the transaction.
921
  @param all   This is @c true if this is a real transaction commit, and
922
               @false otherwise.
923
924
  @see handlerton::commit
925
*/
77.1.45 by Monty Taylor
Warning fixes.
926
static int binlog_commit(handlerton *hton __attribute__((__unused__)),
927
                         THD *thd, bool all)
1 by brian
clean slate
928
{
929
  DBUG_ENTER("binlog_commit");
930
  binlog_trx_data *const trx_data=
931
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
932
933
  if (trx_data->empty())
934
  {
935
    // we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid()
936
    trx_data->reset();
937
    DBUG_RETURN(0);
938
  }
939
940
  /*
941
    Decision table for committing a transaction. The top part, the
942
    *conditions* represent different cases that can occur, and hte
943
    bottom part, the *actions*, represent what should be done in that
944
    particular case.
945
946
    Real transaction        'all' was true
947
948
    Statement in cache      There were at least one statement in the
949
                            transaction cache
950
951
    In transaction          We are inside a transaction
952
953
    Stmt modified non-trans The statement being committed modified a
954
                            non-transactional table
955
956
    All modified non-trans  Some statement before this one in the
957
                            transaction modified a non-transactional
958
                            table
959
960
961
    =============================  = = = = = = = = = = = = = = = =
962
    Real transaction               N N N N N N N N N N N N N N N N
963
    Statement in cache             N N N N N N N N Y Y Y Y Y Y Y Y
964
    In transaction                 N N N N Y Y Y Y N N N N Y Y Y Y
965
    Stmt modified non-trans        N N Y Y N N Y Y N N Y Y N N Y Y
966
    All modified non-trans         N Y N Y N Y N Y N Y N Y N Y N Y
967
968
    Action: (C)ommit/(A)ccumulate  C C - C A C - C - - - - A A - A
969
    =============================  = = = = = = = = = = = = = = = =
970
971
972
    =============================  = = = = = = = = = = = = = = = =
973
    Real transaction               Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y
974
    Statement in cache             N N N N N N N N Y Y Y Y Y Y Y Y
975
    In transaction                 N N N N Y Y Y Y N N N N Y Y Y Y
976
    Stmt modified non-trans        N N Y Y N N Y Y N N Y Y N N Y Y
977
    All modified non-trans         N Y N Y N Y N Y N Y N Y N Y N Y
978
979
    (C)ommit/(A)ccumulate/(-)      - - - - C C - C - - - - C C - C
980
    =============================  = = = = = = = = = = = = = = = =
981
982
    In other words, we commit the transaction if and only if both of
983
    the following are true:
984
     - We are not in a transaction and committing a statement
985
986
     - We are in a transaction and one (or more) of the following are
987
       true:
988
989
       - A full transaction is committed
990
991
         OR
992
993
       - A non-transactional statement is committed and there is
994
         no statement cached
995
996
    Otherwise, we accumulate the statement
997
  */
998
  ulonglong const in_transaction=
999
    thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
1000
  DBUG_PRINT("debug",
1001
             ("all: %d, empty: %s, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
1002
              all,
1003
              YESNO(trx_data->empty()),
1004
              YESNO(in_transaction),
1005
              YESNO(thd->transaction.all.modified_non_trans_table),
1006
              YESNO(thd->transaction.stmt.modified_non_trans_table)));
1007
  if ((in_transaction && (all || (!trx_data->at_least_one_stmt && thd->transaction.stmt.modified_non_trans_table))) || (!in_transaction && !all))
1008
  {
1009
    Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE);
1010
    qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
1011
    int error= binlog_end_trans(thd, trx_data, &qev, all);
1012
    DBUG_RETURN(error);
1013
  }
1014
  DBUG_RETURN(0);
1015
}
1016
1017
/**
1018
  This function is called when a transaction involving a transactional
1019
  table is rolled back.
1020
1021
  It has the responsibility to flush the transaction cache to the
1022
  binlog file. However, if the transaction does not involve
1023
  non-transactional tables, nothing needs to be logged.
1024
1025
  @param hton  The binlog handlerton.
1026
  @param thd   The client thread that executes the transaction.
1027
  @param all   This is @c true if this is a real transaction rollback, and
1028
               @false otherwise.
1029
1030
  @see handlerton::rollback
1031
*/
77.1.45 by Monty Taylor
Warning fixes.
1032
static int binlog_rollback(handlerton *hton __attribute__((__unused__)),
1033
                           THD *thd, bool all)
1 by brian
clean slate
1034
{
1035
  DBUG_ENTER("binlog_rollback");
1036
  int error=0;
1037
  binlog_trx_data *const trx_data=
1038
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
1039
1040
  if (trx_data->empty()) {
1041
    trx_data->reset();
1042
    DBUG_RETURN(0);
1043
  }
1044
1045
  DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
1046
                       YESNO(all),
1047
                       YESNO(thd->transaction.all.modified_non_trans_table),
1048
                       YESNO(thd->transaction.stmt.modified_non_trans_table)));
1049
  if ((all && thd->transaction.all.modified_non_trans_table) ||
1050
      (!all && thd->transaction.stmt.modified_non_trans_table) ||
1051
      (thd->options & OPTION_KEEP_LOG))
1052
  {
1053
    /*
1054
      We write the transaction cache with a rollback last if we have
1055
      modified any non-transactional table. We do this even if we are
1056
      committing a single statement that has modified a
1057
      non-transactional table since it can have modified a
1058
      transactional table in that statement as well, which needs to be
1059
      rolled back on the slave.
1060
    */
1061
    Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE);
1062
    qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
1063
    error= binlog_end_trans(thd, trx_data, &qev, all);
1064
  }
1065
  else if ((all && !thd->transaction.all.modified_non_trans_table) ||
1066
           (!all && !thd->transaction.stmt.modified_non_trans_table))
1067
  {
1068
    /*
1069
      If we have modified only transactional tables, we can truncate
1070
      the transaction cache without writing anything to the binary
1071
      log.
1072
     */
1073
    error= binlog_end_trans(thd, trx_data, 0, all);
1074
  }
1075
  DBUG_RETURN(error);
1076
}
1077
1078
/**
1079
  @note
1080
  How do we handle this (unlikely but legal) case:
1081
  @verbatim
1082
    [transaction] + [update to non-trans table] + [rollback to savepoint] ?
1083
  @endverbatim
1084
  The problem occurs when a savepoint is before the update to the
1085
  non-transactional table. Then when there's a rollback to the savepoint, if we
1086
  simply truncate the binlog cache, we lose the part of the binlog cache where
1087
  the update is. If we want to not lose it, we need to write the SAVEPOINT
1088
  command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1089
  is easy: it's just write at the end of the binlog cache, but the former
1090
  should be *inserted* to the place where the user called SAVEPOINT. The
1091
  solution is that when the user calls SAVEPOINT, we write it to the binlog
1092
  cache (so no need to later insert it). As transactions are never intermixed
1093
  in the binary log (i.e. they are serialized), we won't have conflicts with
1094
  savepoint names when using mysqlbinlog or in the slave SQL thread.
1095
  Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1096
  non-transactional table, we don't truncate the binlog cache but instead write
1097
  ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1098
  will chop the SAVEPOINT command from the binlog cache, which is good as in
1099
  that case there is no need to have it in the binlog).
1100
*/
1101
77.1.45 by Monty Taylor
Warning fixes.
1102
static int binlog_savepoint_set(handlerton *hton __attribute__((__unused__)),
1103
                                THD *thd, void *sv)
1 by brian
clean slate
1104
{
1105
  DBUG_ENTER("binlog_savepoint_set");
1106
1107
  binlog_trans_log_savepos(thd, (my_off_t*) sv);
1108
  /* Write it to the binary log */
1109
  
1110
  int const error=
1111
    thd->binlog_query(THD::STMT_QUERY_TYPE,
1112
                      thd->query, thd->query_length, TRUE, FALSE);
1113
  DBUG_RETURN(error);
1114
}
1115
77.1.45 by Monty Taylor
Warning fixes.
1116
static int binlog_savepoint_rollback(handlerton *hton __attribute__((__unused__)),
1117
                                     THD *thd, void *sv)
1 by brian
clean slate
1118
{
1119
  DBUG_ENTER("binlog_savepoint_rollback");
1120
1121
  /*
1122
    Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1123
    non-transactional table. Otherwise, truncate the binlog cache starting
1124
    from the SAVEPOINT command.
1125
  */
1126
  if (unlikely(thd->transaction.all.modified_non_trans_table || 
1127
               (thd->options & OPTION_KEEP_LOG)))
1128
  {
1129
    int error=
1130
      thd->binlog_query(THD::STMT_QUERY_TYPE,
1131
                        thd->query, thd->query_length, TRUE, FALSE);
1132
    DBUG_RETURN(error);
1133
  }
1134
  binlog_trans_log_truncate(thd, *(my_off_t*)sv);
1135
  DBUG_RETURN(0);
1136
}
1137
1138
1139
int check_binlog_magic(IO_CACHE* log, const char** errmsg)
1140
{
1141
  char magic[4];
1142
  DBUG_ASSERT(my_b_tell(log) == 0);
1143
1144
  if (my_b_read(log, (uchar*) magic, sizeof(magic)))
1145
  {
1146
    *errmsg = "I/O error reading the header from the binary log";
1147
    sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
1148
		    log->error);
1149
    return 1;
1150
  }
1151
  if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
1152
  {
1153
    *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
1154
    return 1;
1155
  }
1156
  return 0;
1157
}
1158
1159
1160
File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
1161
{
1162
  File file;
1163
  DBUG_ENTER("open_binlog");
1164
1165
  if ((file = my_open(log_file_name, O_RDONLY | O_BINARY | O_SHARE, 
1166
                      MYF(MY_WME))) < 0)
1167
  {
1168
    sql_print_error("Failed to open log (file '%s', errno %d)",
1169
                    log_file_name, my_errno);
1170
    *errmsg = "Could not open log file";
1171
    goto err;
1172
  }
1173
  if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
1174
                    MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
1175
  {
1176
    sql_print_error("Failed to create a cache on log (file '%s')",
1177
                    log_file_name);
1178
    *errmsg = "Could not open log file";
1179
    goto err;
1180
  }
1181
  if (check_binlog_magic(log,errmsg))
1182
    goto err;
1183
  DBUG_RETURN(file);
1184
1185
err:
1186
  if (file >= 0)
1187
  {
1188
    my_close(file,MYF(0));
1189
    end_io_cache(log);
1190
  }
1191
  DBUG_RETURN(-1);
1192
}
1193
1194
1195
/**
1196
  Find a unique filename for 'filename.#'.
1197
1198
  Set '#' to a number as low as possible.
1199
1200
  @return
1201
    nonzero if not possible to get unique filename
1202
*/
1203
1204
static int find_uniq_filename(char *name)
1205
{
1206
  long                  number;
1207
  uint                  i;
1208
  char                  buff[FN_REFLEN];
1209
  struct st_my_dir     *dir_info;
1210
  register struct fileinfo *file_info;
1211
  ulong                 max_found=0;
1212
  size_t		buf_length, length;
1213
  char			*start, *end;
1214
  DBUG_ENTER("find_uniq_filename");
1215
1216
  length= dirname_part(buff, name, &buf_length);
1217
  start=  name + length;
1218
  end=    strend(start);
1219
1220
  *end='.';
1221
  length= (size_t) (end-start+1);
1222
1223
  if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
1224
  {						// This shouldn't happen
1225
    strmov(end,".1");				// use name+1
1226
    DBUG_RETURN(0);
1227
  }
1228
  file_info= dir_info->dir_entry;
1229
  for (i=dir_info->number_off_files ; i-- ; file_info++)
1230
  {
1231
    if (bcmp((uchar*) file_info->name, (uchar*) start, length) == 0 &&
1232
	test_if_number(file_info->name+length, &number,0))
1233
    {
1234
      set_if_bigger(max_found,(ulong) number);
1235
    }
1236
  }
1237
  my_dirend(dir_info);
1238
1239
  *end++='.';
1240
  sprintf(end,"%06ld",max_found+1);
1241
  DBUG_RETURN(0);
1242
}
1243
1244
1245
void MYSQL_LOG::init(enum_log_type log_type_arg,
1246
                     enum cache_type io_cache_type_arg)
1247
{
1248
  DBUG_ENTER("MYSQL_LOG::init");
1249
  log_type= log_type_arg;
1250
  io_cache_type= io_cache_type_arg;
1251
  DBUG_PRINT("info",("log_type: %d", log_type));
1252
  DBUG_VOID_RETURN;
1253
}
1254
1255
1256
/*
1257
  Open a (new) log file.
1258
1259
  SYNOPSIS
1260
    open()
1261
1262
    log_name            The name of the log to open
1263
    log_type_arg        The type of the log. E.g. LOG_NORMAL
1264
    new_name            The new name for the logfile. This is only needed
1265
                        when the method is used to open the binlog file.
1266
    io_cache_type_arg   The type of the IO_CACHE to use for this log file
1267
1268
  DESCRIPTION
1269
    Open the logfile, init IO_CACHE and write startup messages
1270
    (in case of general and slow query logs).
1271
1272
  RETURN VALUES
1273
    0   ok
1274
    1   error
1275
*/
1276
1277
bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
1278
                     const char *new_name, enum cache_type io_cache_type_arg)
1279
{
1280
  char buff[FN_REFLEN];
1281
  File file= -1;
1282
  int open_flags= O_CREAT | O_BINARY;
1283
  DBUG_ENTER("MYSQL_LOG::open");
1284
  DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
1285
1286
  write_error= 0;
1287
1288
  init(log_type_arg, io_cache_type_arg);
1289
1290
  if (!(name= my_strdup(log_name, MYF(MY_WME))))
1291
  {
1292
    name= (char *)log_name; // for the error message
1293
    goto err;
1294
  }
1295
1296
  if (new_name)
1297
    strmov(log_file_name, new_name);
1298
  else if (generate_new_name(log_file_name, name))
1299
    goto err;
1300
1301
  if (io_cache_type == SEQ_READ_APPEND)
1302
    open_flags |= O_RDWR | O_APPEND;
1303
  else
1304
    open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
1305
1306
  db[0]= 0;
1307
1308
  if ((file= my_open(log_file_name, open_flags,
1309
                     MYF(MY_WME | ME_WAITTANG))) < 0 ||
1310
      init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
1311
                    my_tell(file, MYF(MY_WME)), 0,
1312
                    MYF(MY_WME | MY_NABP |
1313
                        ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
1314
    goto err;
1315
1316
  if (log_type == LOG_NORMAL)
1317
  {
1318
    char *end;
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
1319
    int len=snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
1320
		     "started with:\nTCP Port: %d, Named Pipe: %s\n",
1321
                     my_progname, server_version, MYSQL_COMPILATION_COMMENT,
1322
                     mysqld_port, ""
1323
                     );
1 by brian
clean slate
1324
    end= strnmov(buff + len, "Time                 Id Command    Argument\n",
1325
                 sizeof(buff) - len);
1326
    if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
1327
	flush_io_cache(&log_file))
1328
      goto err;
1329
  }
1330
1331
  log_state= LOG_OPENED;
1332
  DBUG_RETURN(0);
1333
1334
err:
1335
  sql_print_error("Could not use %s for logging (error %d). \
1336
Turning logging off for the whole duration of the MySQL server process. \
1337
To turn it on again: fix the cause, \
1338
shutdown the MySQL server and restart it.", name, errno);
1339
  if (file >= 0)
1340
    my_close(file, MYF(0));
1341
  end_io_cache(&log_file);
1342
  safeFree(name);
1343
  log_state= LOG_CLOSED;
1344
  DBUG_RETURN(1);
1345
}
1346
1347
MYSQL_LOG::MYSQL_LOG()
1348
  : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
1349
    log_state(LOG_CLOSED)
1350
{
1351
  /*
1352
    We don't want to initialize LOCK_Log here as such initialization depends on
1353
    safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1354
    called only in main(). Doing initialization here would make it happen
1355
    before main().
1356
  */
1357
  bzero((char*) &log_file, sizeof(log_file));
1358
}
1359
1360
void MYSQL_LOG::init_pthread_objects()
1361
{
1362
  DBUG_ASSERT(inited == 0);
1363
  inited= 1;
1364
  (void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1365
}
1366
1367
/*
1368
  Close the log file
1369
1370
  SYNOPSIS
1371
    close()
1372
    exiting     Bitmask. For the slow and general logs the only used bit is
1373
                LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call
1374
                open at once after close.
1375
1376
  NOTES
1377
    One can do an open on the object at once after doing a close.
1378
    The internal structures are not freed until cleanup() is called
1379
*/
1380
1381
void MYSQL_LOG::close(uint exiting)
1382
{					// One can't set log_type here!
1383
  DBUG_ENTER("MYSQL_LOG::close");
1384
  DBUG_PRINT("enter",("exiting: %d", (int) exiting));
1385
  if (log_state == LOG_OPENED)
1386
  {
1387
    end_io_cache(&log_file);
1388
1389
    if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error)
1390
    {
1391
      write_error= 1;
1392
      sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1393
    }
1394
1395
    if (my_close(log_file.file, MYF(MY_WME)) && ! write_error)
1396
    {
1397
      write_error= 1;
1398
      sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1399
    }
1400
  }
1401
1402
  log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
1403
  safeFree(name);
1404
  DBUG_VOID_RETURN;
1405
}
1406
1407
/** This is called only once. */
1408
1409
void MYSQL_LOG::cleanup()
1410
{
1411
  DBUG_ENTER("cleanup");
1412
  if (inited)
1413
  {
1414
    inited= 0;
1415
    (void) pthread_mutex_destroy(&LOCK_log);
1416
    close(0);
1417
  }
1418
  DBUG_VOID_RETURN;
1419
}
1420
1421
1422
int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name)
1423
{
1424
  fn_format(new_name, log_name, mysql_data_home, "", 4);
1425
  if (log_type == LOG_BIN)
1426
  {
1427
    if (!fn_ext(log_name)[0])
1428
    {
1429
      if (find_uniq_filename(new_name))
1430
      {
1431
	sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
1432
	return 1;
1433
      }
1434
    }
1435
  }
1436
  return 0;
1437
}
1438
1439
1440
/*
1441
  Reopen the log file
1442
1443
  SYNOPSIS
1444
    reopen_file()
1445
1446
  DESCRIPTION
1447
    Reopen the log file. The method is used during FLUSH LOGS
1448
    and locks LOCK_log mutex
1449
*/
1450
1451
1452
void MYSQL_QUERY_LOG::reopen_file()
1453
{
1454
  char *save_name;
1455
1456
  DBUG_ENTER("MYSQL_LOG::reopen_file");
1457
  if (!is_open())
1458
  {
1459
    DBUG_PRINT("info",("log is closed"));
1460
    DBUG_VOID_RETURN;
1461
  }
1462
1463
  pthread_mutex_lock(&LOCK_log);
1464
1465
  save_name= name;
1466
  name= 0;				// Don't free name
1467
  close(LOG_CLOSE_TO_BE_OPENED);
1468
1469
  /*
1470
     Note that at this point, log_state != LOG_CLOSED (important for is_open()).
1471
  */
1472
1473
  open(save_name, log_type, 0, io_cache_type);
1474
  my_free(save_name, MYF(0));
1475
1476
  pthread_mutex_unlock(&LOCK_log);
1477
1478
  DBUG_VOID_RETURN;
1479
}
1480
1481
1482
/*
1483
  Write a command to traditional general log file
1484
1485
  SYNOPSIS
1486
    write()
1487
1488
    event_time        command start timestamp
1489
    user_host         the pointer to the string with user@host info
1490
    user_host_len     length of the user_host string. this is computed once
1491
                      and passed to all general log  event handlers
1492
    thread_id         Id of the thread, issued a query
1493
    command_type      the type of the command being logged
1494
    command_type_len  the length of the string above
1495
    sql_text          the very text of the query being executed
1496
    sql_text_len      the length of sql_text string
1497
1498
  DESCRIPTION
1499
1500
   Log given command to to normal (not rotable) log file
1501
1502
  RETURN
1503
    FASE - OK
1504
    TRUE - error occured
1505
*/
1506
77.1.45 by Monty Taylor
Warning fixes.
1507
bool MYSQL_QUERY_LOG::write(time_t event_time,
1508
                            const char *user_host __attribute__((__unused__)),
1509
                            uint user_host_len __attribute__((__unused__)),
1510
                            int thread_id,
1 by brian
clean slate
1511
                            const char *command_type, uint command_type_len,
1512
                            const char *sql_text, uint sql_text_len)
1513
{
1514
  char buff[32];
1515
  uint length= 0;
1516
  char local_time_buff[MAX_TIME_SIZE];
1517
  struct tm start;
1518
  uint time_buff_len= 0;
1519
1520
  (void) pthread_mutex_lock(&LOCK_log);
1521
1522
  /* Test if someone closed between the is_open test and lock */
1523
  if (is_open())
1524
  {
1525
    /* Note that my_b_write() assumes it knows the length for this */
1526
      if (event_time != last_time)
1527
      {
1528
        last_time= event_time;
1529
1530
        localtime_r(&event_time, &start);
1531
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
1532
        time_buff_len= snprintf(local_time_buff, MAX_TIME_SIZE,
1533
                                "%02d%02d%02d %2d:%02d:%02d",
1534
                                start.tm_year % 100, start.tm_mon + 1,
1535
                                start.tm_mday, start.tm_hour,
1536
                                start.tm_min, start.tm_sec);
1 by brian
clean slate
1537
1538
        if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
1539
          goto err;
1540
      }
1541
      else
1542
        if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
1543
          goto err;
1544
1545
      /* command_type, thread_id */
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
1546
      length= snprintf(buff, 32, "%5ld ", (long) thread_id);
1 by brian
clean slate
1547
1548
    if (my_b_write(&log_file, (uchar*) buff, length))
1549
      goto err;
1550
1551
    if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
1552
      goto err;
1553
1554
    if (my_b_write(&log_file, (uchar*) "\t", 1))
1555
      goto err;
1556
1557
    /* sql_text */
1558
    if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
1559
      goto err;
1560
1561
    if (my_b_write(&log_file, (uchar*) "\n", 1) ||
1562
        flush_io_cache(&log_file))
1563
      goto err;
1564
  }
1565
1566
  (void) pthread_mutex_unlock(&LOCK_log);
1567
  return FALSE;
1568
err:
1569
1570
  if (!write_error)
1571
  {
1572
    write_error= 1;
1573
    sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1574
  }
1575
  (void) pthread_mutex_unlock(&LOCK_log);
1576
  return TRUE;
1577
}
1578
1579
1580
/*
1581
  Log a query to the traditional slow log file
1582
1583
  SYNOPSIS
1584
    write()
1585
1586
    thd               THD of the query
1587
    current_time      current timestamp
1588
    query_start_arg   command start timestamp
1589
    user_host         the pointer to the string with user@host info
1590
    user_host_len     length of the user_host string. this is computed once
1591
                      and passed to all general log event handlers
1592
    query_utime       Amount of time the query took to execute (in microseconds)
1593
    lock_utime        Amount of time the query was locked (in microseconds)
1594
    is_command        The flag, which determines, whether the sql_text is a
1595
                      query or an administrator command.
1596
    sql_text          the very text of the query or administrator command
1597
                      processed
1598
    sql_text_len      the length of sql_text string
1599
1600
  DESCRIPTION
1601
1602
   Log a query to the slow log file.
1603
1604
  RETURN
1605
    FALSE - OK
1606
    TRUE - error occured
1607
*/
1608
1609
bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
77.1.45 by Monty Taylor
Warning fixes.
1610
                            time_t query_start_arg __attribute__((__unused__)),
1611
                            const char *user_host,
1 by brian
clean slate
1612
                            uint user_host_len, ulonglong query_utime,
1613
                            ulonglong lock_utime, bool is_command,
1614
                            const char *sql_text, uint sql_text_len)
1615
{
1616
  bool error= 0;
1617
  DBUG_ENTER("MYSQL_QUERY_LOG::write");
1618
1619
  (void) pthread_mutex_lock(&LOCK_log);
1620
1621
  if (!is_open())
1622
  {
1623
    (void) pthread_mutex_unlock(&LOCK_log);
1624
    DBUG_RETURN(0);
1625
  }
1626
1627
  if (is_open())
1628
  {						// Safety agains reopen
1629
    int tmp_errno= 0;
1630
    char buff[80], *end;
1631
    char query_time_buff[22+7], lock_time_buff[22+7];
1632
    uint buff_len;
1633
    end= buff;
1634
1635
    if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1636
    {
1637
      if (current_time != last_time)
1638
      {
1639
        last_time= current_time;
1640
        struct tm start;
1641
        localtime_r(&current_time, &start);
1642
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
1643
        buff_len= snprintf(buff, sizeof buff,
1644
                           "# Time: %02d%02d%02d %2d:%02d:%02d\n",
1645
                           start.tm_year % 100, start.tm_mon + 1,
1646
                           start.tm_mday, start.tm_hour,
1647
                           start.tm_min, start.tm_sec);
1 by brian
clean slate
1648
1649
        /* Note that my_b_write() assumes it knows the length for this */
1650
        if (my_b_write(&log_file, (uchar*) buff, buff_len))
1651
          tmp_errno= errno;
1652
      }
1653
      const uchar uh[]= "# User@Host: ";
1654
      if (my_b_write(&log_file, uh, sizeof(uh) - 1))
1655
        tmp_errno= errno;
1656
      if (my_b_write(&log_file, (uchar*) user_host, user_host_len))
1657
        tmp_errno= errno;
1658
      if (my_b_write(&log_file, (uchar*) "\n", 1))
1659
        tmp_errno= errno;
1660
    }
1661
    /* For slow query log */
1662
    sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
1663
    sprintf(lock_time_buff,  "%.6f", ulonglong2double(lock_utime)/1000000.0);
1664
    if (my_b_printf(&log_file,
1665
                    "# Query_time: %s  Lock_time: %s"
1666
                    " Rows_sent: %lu  Rows_examined: %lu\n",
1667
                    query_time_buff, lock_time_buff,
1668
                    (ulong) thd->sent_row_count,
1669
                    (ulong) thd->examined_row_count) == (uint) -1)
1670
      tmp_errno= errno;
1671
    if (thd->db && strcmp(thd->db, db))
1672
    {						// Database changed
1673
      if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
1674
        tmp_errno= errno;
1675
      strmov(db,thd->db);
1676
    }
1677
    if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
1678
    {
1679
      end=strmov(end, ",last_insert_id=");
1680
      end=longlong10_to_str((longlong)
1681
                            thd->first_successful_insert_id_in_prev_stmt_for_binlog,
1682
                            end, -10);
1683
    }
1684
    // Save value if we do an insert.
1685
    if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
1686
    {
1687
      if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1688
      {
1689
        end=strmov(end,",insert_id=");
1690
        end=longlong10_to_str((longlong)
1691
                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
1692
                              end, -10);
1693
      }
1694
    }
1695
1696
    /*
1697
      This info used to show up randomly, depending on whether the query
1698
      checked the query start time or not. now we always write current
1699
      timestamp to the slow log
1700
    */
1701
    end= strmov(end, ",timestamp=");
1702
    end= int10_to_str((long) current_time, end, 10);
1703
1704
    if (end != buff)
1705
    {
1706
      *end++=';';
1707
      *end='\n';
1708
      if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
1709
          my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
1710
        tmp_errno= errno;
1711
    }
1712
    if (is_command)
1713
    {
1714
      end= strxmov(buff, "# administrator command: ", NullS);
1715
      buff_len= (ulong) (end - buff);
1716
      my_b_write(&log_file, (uchar*) buff, buff_len);
1717
    }
1718
    if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
1719
        my_b_write(&log_file, (uchar*) ";\n",2) ||
1720
        flush_io_cache(&log_file))
1721
      tmp_errno= errno;
1722
    if (tmp_errno)
1723
    {
1724
      error= 1;
1725
      if (! write_error)
1726
      {
1727
        write_error= 1;
1728
        sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
1729
      }
1730
    }
1731
  }
1732
  (void) pthread_mutex_unlock(&LOCK_log);
1733
  DBUG_RETURN(error);
1734
}
1735
1736
1737
/**
1738
  @todo
1739
  The following should be using fn_format();  We just need to
1740
  first change fn_format() to cut the file name if it's too long.
1741
*/
1742
const char *MYSQL_LOG::generate_name(const char *log_name,
1743
                                      const char *suffix,
1744
                                      bool strip_ext, char *buff)
1745
{
1746
  if (!log_name || !log_name[0])
1747
  {
1748
    strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
1749
    return (const char *)
1750
      fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
1751
  }
1752
  // get rid of extension if the log is binary to avoid problems
1753
  if (strip_ext)
1754
  {
1755
    char *p= fn_ext(log_name);
1756
    uint length= (uint) (p - log_name);
1757
    strmake(buff, log_name, min(length, FN_REFLEN));
1758
    return (const char*)buff;
1759
  }
1760
  return log_name;
1761
}
1762
1763
1764
1765
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
1766
  :bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
1767
   need_start_event(TRUE), m_table_map_version(0),
1768
   description_event_for_exec(0), description_event_for_queue(0)
1769
{
1770
  /*
1771
    We don't want to initialize locks here as such initialization depends on
1772
    safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1773
    called only in main(). Doing initialization here would make it happen
1774
    before main().
1775
  */
1776
  index_file_name[0] = 0;
1777
  bzero((char*) &index_file, sizeof(index_file));
1778
}
1779
1780
/* this is called only once */
1781
1782
void MYSQL_BIN_LOG::cleanup()
1783
{
1784
  DBUG_ENTER("cleanup");
1785
  if (inited)
1786
  {
1787
    inited= 0;
1788
    close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
1789
    delete description_event_for_queue;
1790
    delete description_event_for_exec;
1791
    (void) pthread_mutex_destroy(&LOCK_log);
1792
    (void) pthread_mutex_destroy(&LOCK_index);
1793
    (void) pthread_cond_destroy(&update_cond);
1794
  }
1795
  DBUG_VOID_RETURN;
1796
}
1797
1798
1799
/* Init binlog-specific vars */
1800
void MYSQL_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
1801
{
1802
  DBUG_ENTER("MYSQL_BIN_LOG::init");
1803
  no_auto_events= no_auto_events_arg;
1804
  max_size= max_size_arg;
1805
  DBUG_PRINT("info",("max_size: %lu", max_size));
1806
  DBUG_VOID_RETURN;
1807
}
1808
1809
1810
void MYSQL_BIN_LOG::init_pthread_objects()
1811
{
1812
  DBUG_ASSERT(inited == 0);
1813
  inited= 1;
1814
  (void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1815
  (void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
1816
  (void) pthread_cond_init(&update_cond, 0);
1817
}
1818
1819
1820
bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
1821
                                const char *log_name)
1822
{
1823
  File index_file_nr= -1;
1824
  DBUG_ASSERT(!my_b_inited(&index_file));
1825
1826
  /*
1827
    First open of this class instance
1828
    Create an index file that will hold all file names uses for logging.
1829
    Add new entries to the end of it.
1830
  */
1831
  myf opt= MY_UNPACK_FILENAME;
1832
  if (!index_file_name_arg)
1833
  {
1834
    index_file_name_arg= log_name;    // Use same basename for index file
1835
    opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
1836
  }
1837
  fn_format(index_file_name, index_file_name_arg, mysql_data_home,
1838
            ".index", opt);
1839
  if ((index_file_nr= my_open(index_file_name,
1840
                              O_RDWR | O_CREAT | O_BINARY ,
1841
                              MYF(MY_WME))) < 0 ||
1842
       my_sync(index_file_nr, MYF(MY_WME)) ||
1843
       init_io_cache(&index_file, index_file_nr,
1844
                     IO_SIZE, WRITE_CACHE,
1845
                     my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
1846
			0, MYF(MY_WME | MY_WAIT_IF_FULL)))
1847
  {
1848
    /*
1849
      TODO: all operations creating/deleting the index file or a log, should
1850
      call my_sync_dir() or my_sync_dir_by_file() to be durable.
1851
      TODO: file creation should be done with my_create() not my_open().
1852
    */
1853
    if (index_file_nr >= 0)
1854
      my_close(index_file_nr,MYF(0));
1855
    return TRUE;
1856
  }
1857
  return FALSE;
1858
}
1859
1860
1861
/**
1862
  Open a (new) binlog file.
1863
1864
  - Open the log file and the index file. Register the new
1865
  file name in it
1866
  - When calling this when the file is in use, you must have a locks
1867
  on LOCK_log and LOCK_index.
1868
1869
  @retval
1870
    0	ok
1871
  @retval
1872
    1	error
1873
*/
1874
1875
bool MYSQL_BIN_LOG::open(const char *log_name,
1876
                         enum_log_type log_type_arg,
1877
                         const char *new_name,
1878
                         enum cache_type io_cache_type_arg,
1879
                         bool no_auto_events_arg,
1880
                         ulong max_size_arg,
1881
                         bool null_created_arg)
1882
{
1883
  File file= -1;
1884
  DBUG_ENTER("MYSQL_BIN_LOG::open");
1885
  DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
1886
1887
  write_error=0;
1888
1889
  /* open the main log file */
1890
  if (MYSQL_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
1891
    DBUG_RETURN(1);                            /* all warnings issued */
1892
1893
  init(no_auto_events_arg, max_size_arg);
1894
1895
  open_count++;
1896
1897
  DBUG_ASSERT(log_type == LOG_BIN);
1898
1899
  {
1900
    bool write_file_name_to_index_file=0;
1901
1902
    if (!my_b_filelength(&log_file))
1903
    {
1904
      /*
1905
	The binary log file was empty (probably newly created)
1906
	This is the normal case and happens when the user doesn't specify
1907
	an extension for the binary log files.
1908
	In this case we write a standard header to it.
1909
      */
1910
      if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
1911
			  BIN_LOG_HEADER_SIZE))
1912
        goto err;
1913
      bytes_written+= BIN_LOG_HEADER_SIZE;
1914
      write_file_name_to_index_file= 1;
1915
    }
1916
1917
    DBUG_ASSERT(my_b_inited(&index_file) != 0);
1918
    reinit_io_cache(&index_file, WRITE_CACHE,
1919
                    my_b_filelength(&index_file), 0, 0);
1920
    if (need_start_event && !no_auto_events)
1921
    {
1922
      /*
1923
        In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
1924
        even if this is not the very first binlog.
1925
      */
1926
      Format_description_log_event s(BINLOG_VERSION);
1927
      /*
1928
        don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
1929
        as we won't be able to reset it later
1930
      */
1931
      if (io_cache_type == WRITE_CACHE)
1932
        s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
1933
      if (!s.is_valid())
1934
        goto err;
1935
      s.dont_set_created= null_created_arg;
1936
      if (s.write(&log_file))
1937
        goto err;
1938
      bytes_written+= s.data_written;
1939
    }
1940
    if (description_event_for_queue &&
1941
        description_event_for_queue->binlog_version>=4)
1942
    {
1943
      /*
1944
        This is a relay log written to by the I/O slave thread.
1945
        Write the event so that others can later know the format of this relay
1946
        log.
1947
        Note that this event is very close to the original event from the
1948
        master (it has binlog version of the master, event types of the
1949
        master), so this is suitable to parse the next relay log's event. It
1950
        has been produced by
1951
        Format_description_log_event::Format_description_log_event(char* buf,).
1952
        Why don't we want to write the description_event_for_queue if this
1953
        event is for format<4 (3.23 or 4.x): this is because in that case, the
1954
        description_event_for_queue describes the data received from the
1955
        master, but not the data written to the relay log (*conversion*),
1956
        which is in format 4 (slave's).
1957
      */
1958
      /*
1959
        Set 'created' to 0, so that in next relay logs this event does not
1960
        trigger cleaning actions on the slave in
1961
        Format_description_log_event::apply_event_impl().
1962
      */
1963
      description_event_for_queue->created= 0;
1964
      /* Don't set log_pos in event header */
1965
      description_event_for_queue->artificial_event=1;
1966
1967
      if (description_event_for_queue->write(&log_file))
1968
        goto err;
1969
      bytes_written+= description_event_for_queue->data_written;
1970
    }
1971
    if (flush_io_cache(&log_file) ||
1972
        my_sync(log_file.file, MYF(MY_WME)))
1973
      goto err;
1974
1975
    if (write_file_name_to_index_file)
1976
    {
1977
      /*
1978
        As this is a new log file, we write the file name to the index
1979
        file. As every time we write to the index file, we sync it.
1980
      */
1981
      if (my_b_write(&index_file, (uchar*) log_file_name,
1982
		     strlen(log_file_name)) ||
1983
	  my_b_write(&index_file, (uchar*) "\n", 1) ||
1984
	  flush_io_cache(&index_file) ||
1985
          my_sync(index_file.file, MYF(MY_WME)))
1986
	goto err;
1987
    }
1988
  }
1989
  log_state= LOG_OPENED;
1990
1991
  DBUG_RETURN(0);
1992
1993
err:
1994
  sql_print_error("Could not use %s for logging (error %d). \
1995
Turning logging off for the whole duration of the MySQL server process. \
1996
To turn it on again: fix the cause, \
1997
shutdown the MySQL server and restart it.", name, errno);
1998
  if (file >= 0)
1999
    my_close(file,MYF(0));
2000
  end_io_cache(&log_file);
2001
  end_io_cache(&index_file);
2002
  safeFree(name);
2003
  log_state= LOG_CLOSED;
2004
  DBUG_RETURN(1);
2005
}
2006
2007
2008
int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
2009
{
2010
  pthread_mutex_lock(&LOCK_log);
2011
  int ret = raw_get_current_log(linfo);
2012
  pthread_mutex_unlock(&LOCK_log);
2013
  return ret;
2014
}
2015
2016
int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
2017
{
2018
  strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
2019
  linfo->pos = my_b_tell(&log_file);
2020
  return 0;
2021
}
2022
2023
/**
2024
  Move all data up in a file in an filename index file.
2025
2026
    We do the copy outside of the IO_CACHE as the cache buffers would just
2027
    make things slower and more complicated.
2028
    In most cases the copy loop should only do one read.
2029
2030
  @param index_file			File to move
2031
  @param offset			Move everything from here to beginning
2032
2033
  @note
2034
    File will be truncated to be 'offset' shorter or filled up with newlines
2035
2036
  @retval
2037
    0	ok
2038
*/
2039
2040
#ifdef HAVE_REPLICATION
2041
2042
static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
2043
{
2044
  int bytes_read;
2045
  my_off_t init_offset= offset;
2046
  File file= index_file->file;
2047
  uchar io_buf[IO_SIZE*2];
2048
  DBUG_ENTER("copy_up_file_and_fill");
2049
2050
  for (;; offset+= bytes_read)
2051
  {
2052
    (void) my_seek(file, offset, MY_SEEK_SET, MYF(0));
2053
    if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
2054
	< 0)
2055
      goto err;
2056
    if (!bytes_read)
2057
      break;					// end of file
2058
    (void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
2059
    if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
2060
      goto err;
2061
  }
2062
  /* The following will either truncate the file or fill the end with \n' */
30 by Brian Aker
Large file and ftruncate() support
2063
  if (ftruncate(file, offset - init_offset) || my_sync(file, MYF(MY_WME)))
1 by brian
clean slate
2064
    goto err;
2065
2066
  /* Reset data in old index cache */
2067
  reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
2068
  DBUG_RETURN(0);
2069
2070
err:
2071
  DBUG_RETURN(1);
2072
}
2073
2074
#endif /* HAVE_REPLICATION */
2075
2076
/**
2077
  Find the position in the log-index-file for the given log name.
2078
2079
  @param linfo		Store here the found log file name and position to
2080
                       the NEXT log file name in the index file.
2081
  @param log_name	Filename to find in the index file.
2082
                       Is a null pointer if we want to read the first entry
2083
  @param need_lock	Set this to 1 if the parent doesn't already have a
2084
                       lock on LOCK_index
2085
2086
  @note
2087
    On systems without the truncate function the file will end with one or
2088
    more empty lines.  These will be ignored when reading the file.
2089
2090
  @retval
2091
    0			ok
2092
  @retval
2093
    LOG_INFO_EOF	        End of log-index-file found
2094
  @retval
2095
    LOG_INFO_IO		Got IO error while reading file
2096
*/
2097
2098
int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
2099
			    bool need_lock)
2100
{
2101
  int error= 0;
2102
  char *fname= linfo->log_file_name;
2103
  uint log_name_len= log_name ? (uint) strlen(log_name) : 0;
2104
  DBUG_ENTER("find_log_pos");
2105
  DBUG_PRINT("enter",("log_name: %s", log_name ? log_name : "NULL"));
2106
2107
  /*
2108
    Mutex needed because we need to make sure the file pointer does not
2109
    move from under our feet
2110
  */
2111
  if (need_lock)
2112
    pthread_mutex_lock(&LOCK_index);
2113
  safe_mutex_assert_owner(&LOCK_index);
2114
2115
  /* As the file is flushed, we can't get an error here */
2116
  (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
2117
2118
  for (;;)
2119
  {
2120
    uint length;
2121
    my_off_t offset= my_b_tell(&index_file);
2122
    /* If we get 0 or 1 characters, this is the end of the file */
2123
2124
    if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2125
    {
2126
      /* Did not find the given entry; Return not found or error */
2127
      error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2128
      break;
2129
    }
2130
2131
    // if the log entry matches, null string matching anything
2132
    if (!log_name ||
2133
	(log_name_len == length-1 && fname[log_name_len] == '\n' &&
2134
	 !memcmp(fname, log_name, log_name_len)))
2135
    {
2136
      DBUG_PRINT("info",("Found log file entry"));
2137
      fname[length-1]=0;			// remove last \n
2138
      linfo->index_file_start_offset= offset;
2139
      linfo->index_file_offset = my_b_tell(&index_file);
2140
      break;
2141
    }
2142
  }
2143
2144
  if (need_lock)
2145
    pthread_mutex_unlock(&LOCK_index);
2146
  DBUG_RETURN(error);
2147
}
2148
2149
2150
/**
2151
  Find the position in the log-index-file for the given log name.
2152
2153
  @param
2154
    linfo		Store here the next log file name and position to
2155
			the file name after that.
2156
  @param
2157
    need_lock		Set this to 1 if the parent doesn't already have a
2158
			lock on LOCK_index
2159
2160
  @note
2161
    - Before calling this function, one has to call find_log_pos()
2162
    to set up 'linfo'
2163
    - Mutex needed because we need to make sure the file pointer does not move
2164
    from under our feet
2165
2166
  @retval
2167
    0			ok
2168
  @retval
2169
    LOG_INFO_EOF	        End of log-index-file found
2170
  @retval
2171
    LOG_INFO_IO		Got IO error while reading file
2172
*/
2173
2174
int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
2175
{
2176
  int error= 0;
2177
  uint length;
2178
  char *fname= linfo->log_file_name;
2179
2180
  if (need_lock)
2181
    pthread_mutex_lock(&LOCK_index);
2182
  safe_mutex_assert_owner(&LOCK_index);
2183
2184
  /* As the file is flushed, we can't get an error here */
2185
  (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
2186
			 0);
2187
2188
  linfo->index_file_start_offset= linfo->index_file_offset;
2189
  if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2190
  {
2191
    error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2192
    goto err;
2193
  }
2194
  fname[length-1]=0;				// kill \n
2195
  linfo->index_file_offset = my_b_tell(&index_file);
2196
2197
err:
2198
  if (need_lock)
2199
    pthread_mutex_unlock(&LOCK_index);
2200
  return error;
2201
}
2202
2203
2204
/**
2205
  Delete all logs refered to in the index file.
2206
  Start writing to a new log file.
2207
2208
  The new index file will only contain this file.
2209
2210
  @param thd		Thread
2211
2212
  @note
2213
    If not called from slave thread, write start event to new log
2214
2215
  @retval
2216
    0	ok
2217
  @retval
2218
    1   error
2219
*/
2220
2221
bool MYSQL_BIN_LOG::reset_logs(THD* thd)
2222
{
2223
  LOG_INFO linfo;
2224
  bool error=0;
2225
  const char* save_name;
2226
  DBUG_ENTER("reset_logs");
2227
2228
  ha_reset_logs(thd);
2229
  /*
2230
    We need to get both locks to be sure that no one is trying to
2231
    write to the index log file.
2232
  */
2233
  pthread_mutex_lock(&LOCK_log);
2234
  pthread_mutex_lock(&LOCK_index);
2235
2236
  /*
2237
    The following mutex is needed to ensure that no threads call
2238
    'delete thd' as we would then risk missing a 'rollback' from this
2239
    thread. If the transaction involved MyISAM tables, it should go
2240
    into binlog even on rollback.
2241
  */
2242
  VOID(pthread_mutex_lock(&LOCK_thread_count));
2243
2244
  /* Save variables so that we can reopen the log */
2245
  save_name=name;
2246
  name=0;					// Protect against free
2247
  close(LOG_CLOSE_TO_BE_OPENED);
2248
2249
  /* First delete all old log files */
2250
2251
  if (find_log_pos(&linfo, NullS, 0))
2252
  {
2253
    error=1;
2254
    goto err;
2255
  }
2256
2257
  for (;;)
2258
  {
2259
    if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
2260
    {
2261
      if (my_errno == ENOENT) 
2262
      {
2263
        push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2264
                            ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2265
                            linfo.log_file_name);
2266
        sql_print_information("Failed to delete file '%s'",
2267
                              linfo.log_file_name);
2268
        my_errno= 0;
2269
        error= 0;
2270
      }
2271
      else
2272
      {
2273
        push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2274
                            ER_BINLOG_PURGE_FATAL_ERR,
2275
                            "a problem with deleting %s; "
2276
                            "consider examining correspondence "
2277
                            "of your binlog index file "
2278
                            "to the actual binlog files",
2279
                            linfo.log_file_name);
2280
        error= 1;
2281
        goto err;
2282
      }
2283
    }
2284
    if (find_next_log(&linfo, 0))
2285
      break;
2286
  }
2287
2288
  /* Start logging with a new file */
2289
  close(LOG_CLOSE_INDEX);
2290
  if ((error= my_delete_allow_opened(index_file_name, MYF(0))))	// Reset (open will update)
2291
  {
2292
    if (my_errno == ENOENT) 
2293
    {
2294
      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2295
                          ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2296
                          index_file_name);
2297
      sql_print_information("Failed to delete file '%s'",
2298
                            index_file_name);
2299
      my_errno= 0;
2300
      error= 0;
2301
    }
2302
    else
2303
    {
2304
      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2305
                          ER_BINLOG_PURGE_FATAL_ERR,
2306
                          "a problem with deleting %s; "
2307
                          "consider examining correspondence "
2308
                          "of your binlog index file "
2309
                          "to the actual binlog files",
2310
                          index_file_name);
2311
      error= 1;
2312
      goto err;
2313
    }
2314
  }
2315
  if (!thd->slave_thread)
2316
    need_start_event=1;
2317
  if (!open_index_file(index_file_name, 0))
2318
    open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0);
2319
  my_free((uchar*) save_name, MYF(0));
2320
2321
err:
2322
  VOID(pthread_mutex_unlock(&LOCK_thread_count));
2323
  pthread_mutex_unlock(&LOCK_index);
2324
  pthread_mutex_unlock(&LOCK_log);
2325
  DBUG_RETURN(error);
2326
}
2327
2328
2329
/**
2330
  Delete relay log files prior to rli->group_relay_log_name
2331
  (i.e. all logs which are not involved in a non-finished group
2332
  (transaction)), remove them from the index file and start on next
2333
  relay log.
2334
2335
  IMPLEMENTATION
2336
  - Protects index file with LOCK_index
2337
  - Delete relevant relay log files
2338
  - Copy all file names after these ones to the front of the index file
2339
  - If the OS has truncate, truncate the file, else fill it with \n'
2340
  - Read the next file name from the index file and store in rli->linfo
2341
2342
  @param rli	       Relay log information
2343
  @param included     If false, all relay logs that are strictly before
2344
                      rli->group_relay_log_name are deleted ; if true, the
2345
                      latter is deleted too (i.e. all relay logs
2346
                      read by the SQL slave thread are deleted).
2347
2348
  @note
2349
    - This is only called from the slave-execute thread when it has read
2350
    all commands from a relay log and want to switch to a new relay log.
2351
    - When this happens, we can be in an active transaction as
2352
    a transaction can span over two relay logs
2353
    (although it is always written as a single block to the master's binary
2354
    log, hence cannot span over two master's binary logs).
2355
2356
  @retval
2357
    0			ok
2358
  @retval
2359
    LOG_INFO_EOF	        End of log-index-file found
2360
  @retval
2361
    LOG_INFO_SEEK	Could not allocate IO cache
2362
  @retval
2363
    LOG_INFO_IO		Got IO error while reading file
2364
*/
2365
2366
#ifdef HAVE_REPLICATION
2367
2368
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
2369
{
2370
  int error;
2371
  DBUG_ENTER("purge_first_log");
2372
2373
  DBUG_ASSERT(is_open());
2374
  DBUG_ASSERT(rli->slave_running == 1);
2375
  DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
2376
2377
  pthread_mutex_lock(&LOCK_index);
2378
  pthread_mutex_lock(&rli->log_space_lock);
2379
  rli->relay_log.purge_logs(rli->group_relay_log_name, included,
2380
                            0, 0, &rli->log_space_total);
2381
  // Tell the I/O thread to take the relay_log_space_limit into account
2382
  rli->ignore_log_space_limit= 0;
2383
  pthread_mutex_unlock(&rli->log_space_lock);
2384
2385
  /*
2386
    Ok to broadcast after the critical region as there is no risk of
2387
    the mutex being destroyed by this thread later - this helps save
2388
    context switches
2389
  */
2390
  pthread_cond_broadcast(&rli->log_space_cond);
2391
  
2392
  /*
2393
    Read the next log file name from the index file and pass it back to
2394
    the caller
2395
    If included is true, we want the first relay log;
2396
    otherwise we want the one after event_relay_log_name.
2397
  */
2398
  if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
2399
      (!included &&
2400
       ((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
2401
        (error=find_next_log(&rli->linfo, 0)))))
2402
  {
2403
    char buff[22];
2404
    sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
2405
                    error,
2406
                    llstr(rli->linfo.index_file_offset,buff),
2407
                    rli->group_relay_log_name,
2408
                    included);
2409
    goto err;
2410
  }
2411
2412
  /*
2413
    Reset rli's coordinates to the current log.
2414
  */
2415
  rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
2416
  strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
2417
	  sizeof(rli->event_relay_log_name)-1);
2418
2419
  /*
2420
    If we removed the rli->group_relay_log_name file,
2421
    we must update the rli->group* coordinates, otherwise do not touch it as the
2422
    group's execution is not finished (e.g. COMMIT not executed)
2423
  */
2424
  if (included)
2425
  {
2426
    rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
2427
    strmake(rli->group_relay_log_name,rli->linfo.log_file_name,
2428
            sizeof(rli->group_relay_log_name)-1);
2429
    rli->notify_group_relay_log_name_update();
2430
  }
2431
2432
  /* Store where we are in the new file for the execution thread */
2433
  flush_relay_log_info(rli);
2434
2435
err:
2436
  pthread_mutex_unlock(&LOCK_index);
2437
  DBUG_RETURN(error);
2438
}
2439
2440
/**
2441
  Update log index_file.
2442
*/
2443
2444
int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
2445
{
2446
  if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
2447
    return LOG_INFO_IO;
2448
2449
  // now update offsets in index file for running threads
2450
  if (need_update_threads)
2451
    adjust_linfo_offsets(log_info->index_file_start_offset);
2452
  return 0;
2453
}
2454
2455
/**
2456
  Remove all logs before the given log from disk and from the index file.
2457
2458
  @param to_log	      Delete all log file name before this file.
2459
  @param included            If true, to_log is deleted too.
2460
  @param need_mutex
2461
  @param need_update_threads If we want to update the log coordinates of
2462
                             all threads. False for relay logs, true otherwise.
2463
  @param freed_log_space     If not null, decrement this variable of
2464
                             the amount of log space freed
2465
2466
  @note
2467
    If any of the logs before the deleted one is in use,
2468
    only purge logs up to this one.
2469
2470
  @retval
2471
    0			ok
2472
  @retval
2473
    LOG_INFO_EOF		to_log not found
2474
    LOG_INFO_EMFILE             too many files opened
2475
    LOG_INFO_FATAL              if any other than ENOENT error from
15 by brian
Fix for stat, NETWARE removal
2476
                                stat() or my_delete()
1 by brian
clean slate
2477
*/
2478
2479
int MYSQL_BIN_LOG::purge_logs(const char *to_log, 
2480
                          bool included,
2481
                          bool need_mutex, 
2482
                          bool need_update_threads, 
2483
                          ulonglong *decrease_log_space)
2484
{
2485
  int error;
2486
  int ret = 0;
2487
  bool exit_loop= 0;
2488
  LOG_INFO log_info;
2489
  DBUG_ENTER("purge_logs");
2490
  DBUG_PRINT("info",("to_log= %s",to_log));
2491
2492
  if (need_mutex)
2493
    pthread_mutex_lock(&LOCK_index);
2494
  if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
2495
    goto err;
2496
2497
  /*
2498
    File name exists in index file; delete until we find this file
2499
    or a file that is used.
2500
  */
2501
  if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2502
    goto err;
2503
  while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
2504
         !log_in_use(log_info.log_file_name))
2505
  {
15 by brian
Fix for stat, NETWARE removal
2506
    struct stat s;
2507
    if (stat(log_info.log_file_name, &s))
1 by brian
clean slate
2508
    {
15 by brian
Fix for stat, NETWARE removal
2509
      if (errno == ENOENT) 
1 by brian
clean slate
2510
      {
2511
        /*
2512
          It's not fatal if we can't stat a log file that does not exist;
2513
          If we could not stat, we won't delete.
2514
        */     
2515
        push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2516
                            ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2517
                            log_info.log_file_name);
15 by brian
Fix for stat, NETWARE removal
2518
        sql_print_information("Failed to execute stat on file '%s'",
1 by brian
clean slate
2519
			      log_info.log_file_name);
2520
        my_errno= 0;
2521
      }
2522
      else
2523
      {
2524
        /*
2525
          Other than ENOENT are fatal
2526
        */
2527
        push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2528
                            ER_BINLOG_PURGE_FATAL_ERR,
2529
                            "a problem with getting info on being purged %s; "
2530
                            "consider examining correspondence "
2531
                            "of your binlog index file "
2532
                            "to the actual binlog files",
2533
                            log_info.log_file_name);
2534
        error= LOG_INFO_FATAL;
2535
        goto err;
2536
      }
2537
    }
2538
    else
2539
    {
2540
      DBUG_PRINT("info",("purging %s",log_info.log_file_name));
2541
      if (!my_delete(log_info.log_file_name, MYF(0)))
2542
      {
2543
        if (decrease_log_space)
2544
          *decrease_log_space-= s.st_size;
2545
      }
2546
      else
2547
      {
2548
        if (my_errno == ENOENT) 
2549
        {
2550
          push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2551
                              ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2552
                              log_info.log_file_name);
2553
          sql_print_information("Failed to delete file '%s'",
2554
                                log_info.log_file_name);
2555
          my_errno= 0;
2556
        }
2557
        else
2558
        {
2559
          push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2560
                              ER_BINLOG_PURGE_FATAL_ERR,
2561
                              "a problem with deleting %s; "
2562
                              "consider examining correspondence "
2563
                              "of your binlog index file "
2564
                              "to the actual binlog files",
2565
                              log_info.log_file_name);
2566
          if (my_errno == EMFILE)
2567
          {
2568
            DBUG_PRINT("info",
2569
                       ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
2570
            error= LOG_INFO_EMFILE;
2571
          }
2572
          error= LOG_INFO_FATAL;
2573
          goto err;
2574
        }
2575
      }
2576
    }
2577
2578
    ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2579
2580
    if (find_next_log(&log_info, 0) || exit_loop)
2581
      break;
2582
  }
2583
  
2584
  /*
2585
    If we get killed -9 here, the sysadmin would have to edit
2586
    the log index file after restart - otherwise, this should be safe
2587
  */
2588
  error= update_log_index(&log_info, need_update_threads);
2589
  if (error == 0) {
2590
    error = ret;
2591
  }
2592
2593
err:
2594
  if (need_mutex)
2595
    pthread_mutex_unlock(&LOCK_index);
2596
  DBUG_RETURN(error);
2597
}
2598
2599
/**
2600
  Remove all logs before the given file date from disk and from the
2601
  index file.
2602
2603
  @param thd		Thread pointer
2604
  @param before_date	Delete all log files before given date.
2605
2606
  @note
2607
    If any of the logs before the deleted one is in use,
2608
    only purge logs up to this one.
2609
2610
  @retval
2611
    0				ok
2612
  @retval
2613
    LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
2614
    LOG_INFO_FATAL              if any other than ENOENT error from
15 by brian
Fix for stat, NETWARE removal
2615
                                stat() or my_delete()
1 by brian
clean slate
2616
*/
2617
2618
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
2619
{
2620
  int error;
2621
  LOG_INFO log_info;
15 by brian
Fix for stat, NETWARE removal
2622
  struct stat stat_area;
1 by brian
clean slate
2623
2624
  DBUG_ENTER("purge_logs_before_date");
2625
2626
  pthread_mutex_lock(&LOCK_index);
2627
2628
  /*
2629
    Delete until we find curren file
2630
    or a file that is used or a file
2631
    that is older than purge_time.
2632
  */
2633
  if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2634
    goto err;
2635
2636
  while (strcmp(log_file_name, log_info.log_file_name) &&
2637
	 !log_in_use(log_info.log_file_name))
2638
  {
15 by brian
Fix for stat, NETWARE removal
2639
    if (stat(log_info.log_file_name, &stat_area))
1 by brian
clean slate
2640
    {
15 by brian
Fix for stat, NETWARE removal
2641
      if (errno == ENOENT) 
1 by brian
clean slate
2642
      {
2643
        /*
2644
          It's not fatal if we can't stat a log file that does not exist.
2645
        */     
2646
        push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2647
                            ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2648
                            log_info.log_file_name);
15 by brian
Fix for stat, NETWARE removal
2649
	sql_print_information("Failed to execute stat on file '%s'",
1 by brian
clean slate
2650
			      log_info.log_file_name);
2651
        my_errno= 0;
2652
      }
2653
      else
2654
      {
2655
        /*
2656
          Other than ENOENT are fatal
2657
        */
2658
        push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2659
                            ER_BINLOG_PURGE_FATAL_ERR,
2660
                            "a problem with getting info on being purged %s; "
2661
                            "consider examining correspondence "
2662
                            "of your binlog index file "
2663
                            "to the actual binlog files",
2664
                            log_info.log_file_name);
2665
        error= LOG_INFO_FATAL;
2666
        goto err;
2667
      }
2668
    }
2669
    else
2670
    {
2671
      if (stat_area.st_mtime >= purge_time)
2672
        break;
2673
      if (my_delete(log_info.log_file_name, MYF(0)))
2674
      {
2675
        if (my_errno == ENOENT) 
2676
        {
2677
          /* It's not fatal even if we can't delete a log file */
2678
          push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2679
                              ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2680
                              log_info.log_file_name);
2681
          sql_print_information("Failed to delete file '%s'",
2682
                                log_info.log_file_name);
2683
          my_errno= 0;
2684
        }
2685
        else
2686
        {
2687
          push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2688
                              ER_BINLOG_PURGE_FATAL_ERR,
2689
                              "a problem with deleting %s; "
2690
                              "consider examining correspondence "
2691
                              "of your binlog index file "
2692
                              "to the actual binlog files",
2693
                              log_info.log_file_name);
2694
          error= LOG_INFO_FATAL;
2695
          goto err;
2696
        }
2697
      }
2698
      ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2699
    }
2700
    if (find_next_log(&log_info, 0))
2701
      break;
2702
  }
2703
2704
  /*
2705
    If we get killed -9 here, the sysadmin would have to edit
2706
    the log index file after restart - otherwise, this should be safe
2707
  */
2708
  error= update_log_index(&log_info, 1);
2709
2710
err:
2711
  pthread_mutex_unlock(&LOCK_index);
2712
  DBUG_RETURN(error);
2713
}
2714
#endif /* HAVE_REPLICATION */
2715
2716
2717
/**
2718
  Create a new log file name.
2719
2720
  @param buf		buf of at least FN_REFLEN where new name is stored
2721
2722
  @note
2723
    If file name will be longer then FN_REFLEN it will be truncated
2724
*/
2725
2726
void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
2727
{
2728
  uint dir_len = dirname_length(log_file_name); 
2729
  if (dir_len >= FN_REFLEN)
2730
    dir_len=FN_REFLEN-1;
2731
  strnmov(buf, log_file_name, dir_len);
2732
  strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
2733
}
2734
2735
2736
/**
2737
  Check if we are writing/reading to the given log file.
2738
*/
2739
2740
bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
2741
{
2742
  return !strcmp(log_file_name, log_file_name_arg);
2743
}
2744
2745
2746
/*
2747
  Wrappers around new_file_impl to avoid using argument
2748
  to control locking. The argument 1) less readable 2) breaks
2749
  incapsulation 3) allows external access to the class without
2750
  a lock (which is not possible with private new_file_without_locking
2751
  method).
2752
*/
2753
2754
void MYSQL_BIN_LOG::new_file()
2755
{
2756
  new_file_impl(1);
2757
}
2758
2759
2760
void MYSQL_BIN_LOG::new_file_without_locking()
2761
{
2762
  new_file_impl(0);
2763
}
2764
2765
2766
/**
2767
  Start writing to a new log file or reopen the old file.
2768
2769
  @param need_lock		Set to 1 if caller has not locked LOCK_log
2770
2771
  @note
2772
    The new file name is stored last in the index file
2773
*/
2774
2775
void MYSQL_BIN_LOG::new_file_impl(bool need_lock)
2776
{
2777
  char new_name[FN_REFLEN], *new_name_ptr, *old_name;
2778
2779
  DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
2780
  if (!is_open())
2781
  {
2782
    DBUG_PRINT("info",("log is closed"));
2783
    DBUG_VOID_RETURN;
2784
  }
2785
2786
  if (need_lock)
2787
    pthread_mutex_lock(&LOCK_log);
2788
  pthread_mutex_lock(&LOCK_index);
2789
2790
  safe_mutex_assert_owner(&LOCK_log);
2791
  safe_mutex_assert_owner(&LOCK_index);
2792
2793
  /*
2794
    if binlog is used as tc log, be sure all xids are "unlogged",
2795
    so that on recover we only need to scan one - latest - binlog file
2796
    for prepared xids. As this is expected to be a rare event,
2797
    simple wait strategy is enough. We're locking LOCK_log to be sure no
2798
    new Xid_log_event's are added to the log (and prepared_xids is not
2799
    increased), and waiting on COND_prep_xids for late threads to
2800
    catch up.
2801
  */
2802
  if (prepared_xids)
2803
  {
2804
    tc_log_page_waits++;
2805
    pthread_mutex_lock(&LOCK_prep_xids);
2806
    while (prepared_xids) {
2807
      DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
2808
      pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
2809
    }
2810
    pthread_mutex_unlock(&LOCK_prep_xids);
2811
  }
2812
2813
  /* Reuse old name if not binlog and not update log */
2814
  new_name_ptr= name;
2815
2816
  /*
2817
    If user hasn't specified an extension, generate a new log name
2818
    We have to do this here and not in open as we want to store the
2819
    new file name in the current binary log file.
2820
  */
2821
  if (generate_new_name(new_name, name))
2822
    goto end;
2823
  new_name_ptr=new_name;
2824
2825
  if (log_type == LOG_BIN)
2826
  {
2827
    if (!no_auto_events)
2828
    {
2829
      /*
2830
        We log the whole file name for log file as the user may decide
2831
        to change base names at some point.
2832
      */
2833
      Rotate_log_event r(new_name+dirname_length(new_name),
2834
                         0, LOG_EVENT_OFFSET, 0);
2835
      r.write(&log_file);
2836
      bytes_written += r.data_written;
2837
    }
2838
    /*
2839
      Update needs to be signalled even if there is no rotate event
2840
      log rotation should give the waiting thread a signal to
2841
      discover EOF and move on to the next log.
2842
    */
2843
    signal_update();
2844
  }
2845
  old_name=name;
2846
  name=0;				// Don't free name
2847
  close(LOG_CLOSE_TO_BE_OPENED);
2848
2849
  /*
2850
     Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2851
  */
2852
2853
  /*
2854
     new_file() is only used for rotation (in FLUSH LOGS or because size >
2855
     max_binlog_size or max_relay_log_size).
2856
     If this is a binary log, the Format_description_log_event at the beginning of
2857
     the new file should have created=0 (to distinguish with the
2858
     Format_description_log_event written at server startup, which should
2859
     trigger temp tables deletion on slaves.
2860
  */
2861
2862
  open(old_name, log_type, new_name_ptr,
2863
       io_cache_type, no_auto_events, max_size, 1);
2864
  my_free(old_name,MYF(0));
2865
2866
end:
2867
  if (need_lock)
2868
    pthread_mutex_unlock(&LOCK_log);
2869
  pthread_mutex_unlock(&LOCK_index);
2870
2871
  DBUG_VOID_RETURN;
2872
}
2873
2874
2875
bool MYSQL_BIN_LOG::append(Log_event* ev)
2876
{
2877
  bool error = 0;
2878
  pthread_mutex_lock(&LOCK_log);
2879
  DBUG_ENTER("MYSQL_BIN_LOG::append");
2880
2881
  DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
2882
  /*
2883
    Log_event::write() is smart enough to use my_b_write() or
2884
    my_b_append() depending on the kind of cache we have.
2885
  */
2886
  if (ev->write(&log_file))
2887
  {
2888
    error=1;
2889
    goto err;
2890
  }
2891
  bytes_written+= ev->data_written;
2892
  DBUG_PRINT("info",("max_size: %lu",max_size));
2893
  if ((uint) my_b_append_tell(&log_file) > max_size)
2894
    new_file_without_locking();
2895
2896
err:
2897
  pthread_mutex_unlock(&LOCK_log);
2898
  signal_update();				// Safe as we don't call close
2899
  DBUG_RETURN(error);
2900
}
2901
2902
2903
bool MYSQL_BIN_LOG::appendv(const char* buf, uint len,...)
2904
{
2905
  bool error= 0;
2906
  DBUG_ENTER("MYSQL_BIN_LOG::appendv");
2907
  va_list(args);
2908
  va_start(args,len);
2909
2910
  DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
2911
2912
  safe_mutex_assert_owner(&LOCK_log);
2913
  do
2914
  {
2915
    if (my_b_append(&log_file,(uchar*) buf,len))
2916
    {
2917
      error= 1;
2918
      goto err;
2919
    }
2920
    bytes_written += len;
2921
  } while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
2922
  DBUG_PRINT("info",("max_size: %lu",max_size));
2923
  if ((uint) my_b_append_tell(&log_file) > max_size)
2924
    new_file_without_locking();
2925
2926
err:
2927
  if (!error)
2928
    signal_update();
2929
  DBUG_RETURN(error);
2930
}
2931
2932
2933
bool MYSQL_BIN_LOG::flush_and_sync()
2934
{
2935
  int err=0, fd=log_file.file;
2936
  safe_mutex_assert_owner(&LOCK_log);
2937
  if (flush_io_cache(&log_file))
2938
    return 1;
2939
  if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
2940
  {
2941
    sync_binlog_counter= 0;
2942
    err=my_sync(fd, MYF(MY_WME));
2943
  }
2944
  return err;
2945
}
2946
2947
void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
2948
{
2949
  DBUG_ASSERT(!thd->binlog_evt_union.do_union);
2950
  thd->binlog_evt_union.do_union= TRUE;
2951
  thd->binlog_evt_union.unioned_events= FALSE;
2952
  thd->binlog_evt_union.unioned_events_trans= FALSE;
2953
  thd->binlog_evt_union.first_query_id= query_id_param;
2954
}
2955
2956
void MYSQL_BIN_LOG::stop_union_events(THD *thd)
2957
{
2958
  DBUG_ASSERT(thd->binlog_evt_union.do_union);
2959
  thd->binlog_evt_union.do_union= FALSE;
2960
}
2961
2962
bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
2963
{
2964
  return (thd->binlog_evt_union.do_union && 
2965
          query_id_param >= thd->binlog_evt_union.first_query_id);
2966
}
2967
2968
2969
/*
2970
  These functions are placed in this file since they need access to
2971
  binlog_hton, which has internal linkage.
2972
*/
2973
2974
int THD::binlog_setup_trx_data()
2975
{
2976
  DBUG_ENTER("THD::binlog_setup_trx_data");
2977
  binlog_trx_data *trx_data=
2978
    (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2979
2980
  if (trx_data)
2981
    DBUG_RETURN(0);                             // Already set up
2982
2983
  trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL));
2984
  if (!trx_data ||
2985
      open_cached_file(&trx_data->trans_log, mysql_tmpdir,
2986
                       LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
2987
  {
2988
    my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR));
2989
    DBUG_RETURN(1);                      // Didn't manage to set it up
2990
  }
2991
  thd_set_ha_data(this, binlog_hton, trx_data);
2992
2993
  trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data;
2994
2995
  DBUG_RETURN(0);
2996
}
2997
2998
/*
2999
  Function to start a statement and optionally a transaction for the
3000
  binary log.
3001
3002
  SYNOPSIS
3003
    binlog_start_trans_and_stmt()
3004
3005
  DESCRIPTION
3006
3007
    This function does three things:
3008
    - Start a transaction if not in autocommit mode or if a BEGIN
3009
      statement has been seen.
3010
3011
    - Start a statement transaction to allow us to truncate the binary
3012
      log.
3013
3014
    - Save the currrent binlog position so that we can roll back the
3015
      statement by truncating the transaction log.
3016
3017
      We only update the saved position if the old one was undefined,
3018
      the reason is that there are some cases (e.g., for CREATE-SELECT)
3019
      where the position is saved twice (e.g., both in
3020
      select_create::prepare() and THD::binlog_write_table_map()) , but
3021
      we should use the first. This means that calls to this function
3022
      can be used to start the statement before the first table map
3023
      event, to include some extra events.
3024
 */
3025
3026
void
3027
THD::binlog_start_trans_and_stmt()
3028
{
3029
  binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3030
  DBUG_ENTER("binlog_start_trans_and_stmt");
3031
  DBUG_PRINT("enter", ("trx_data: 0x%lx  trx_data->before_stmt_pos: %lu",
3032
                       (long) trx_data,
3033
                       (trx_data ? (ulong) trx_data->before_stmt_pos :
3034
                        (ulong) 0)));
3035
3036
  if (trx_data == NULL ||
3037
      trx_data->before_stmt_pos == MY_OFF_T_UNDEF)
3038
  {
3039
    this->binlog_set_stmt_begin();
3040
    if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
3041
      trans_register_ha(this, TRUE, binlog_hton);
3042
    trans_register_ha(this, FALSE, binlog_hton);
3043
    /*
3044
      Mark statement transaction as read/write. We never start
3045
      a binary log transaction and keep it read-only,
3046
      therefore it's best to mark the transaction read/write just
3047
      at the same time we start it.
3048
      Not necessary to mark the normal transaction read/write
3049
      since the statement-level flag will be propagated automatically
3050
      inside ha_commit_trans.
3051
    */
3052
    ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
3053
  }
3054
  DBUG_VOID_RETURN;
3055
}
3056
3057
void THD::binlog_set_stmt_begin() {
3058
  binlog_trx_data *trx_data=
3059
    (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3060
3061
  /*
3062
    The call to binlog_trans_log_savepos() might create the trx_data
3063
    structure, if it didn't exist before, so we save the position
3064
    into an auto variable and then write it into the transaction
3065
    data for the binary log (i.e., trx_data).
3066
  */
3067
  my_off_t pos= 0;
3068
  binlog_trans_log_savepos(this, &pos);
3069
  trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3070
  trx_data->before_stmt_pos= pos;
3071
}
3072
3073
3074
/*
3075
  Write a table map to the binary log.
3076
 */
3077
3078
int THD::binlog_write_table_map(TABLE *table, bool is_trans)
3079
{
3080
  int error;
3081
  DBUG_ENTER("THD::binlog_write_table_map");
3082
  DBUG_PRINT("enter", ("table: 0x%lx  (%s: #%lu)",
3083
                       (long) table, table->s->table_name.str,
3084
                       table->s->table_map_id));
3085
3086
  /* Pre-conditions */
3087
  DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open());
3088
  DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
3089
3090
  Table_map_log_event::flag_set const
3091
    flags= Table_map_log_event::TM_NO_FLAGS;
3092
3093
  Table_map_log_event
3094
    the_event(this, table, table->s->table_map_id, is_trans, flags);
3095
3096
  if (is_trans && binlog_table_maps == 0)
3097
    binlog_start_trans_and_stmt();
3098
3099
  if ((error= mysql_bin_log.write(&the_event)))
3100
    DBUG_RETURN(error);
3101
3102
  binlog_table_maps++;
3103
  table->s->table_map_version= mysql_bin_log.table_map_version();
3104
  DBUG_RETURN(0);
3105
}
3106
3107
Rows_log_event*
3108
THD::binlog_get_pending_rows_event() const
3109
{
3110
  binlog_trx_data *const trx_data=
3111
    (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3112
  /*
3113
    This is less than ideal, but here's the story: If there is no
3114
    trx_data, prepare_pending_rows_event() has never been called
3115
    (since the trx_data is set up there). In that case, we just return
3116
    NULL.
3117
   */
3118
  return trx_data ? trx_data->pending() : NULL;
3119
}
3120
3121
void
3122
THD::binlog_set_pending_rows_event(Rows_log_event* ev)
3123
{
3124
  if (thd_get_ha_data(this, binlog_hton) == NULL)
3125
    binlog_setup_trx_data();
3126
3127
  binlog_trx_data *const trx_data=
3128
    (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3129
3130
  DBUG_ASSERT(trx_data);
3131
  trx_data->set_pending(ev);
3132
}
3133
3134
3135
/*
3136
  Moves the last bunch of rows from the pending Rows event to the binlog
3137
  (either cached binlog if transaction, or disk binlog). Sets a new pending
3138
  event.
3139
*/
3140
int
3141
MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
3142
                                                Rows_log_event* event)
3143
{
3144
  DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
3145
  DBUG_ASSERT(mysql_bin_log.is_open());
3146
  DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
3147
3148
  int error= 0;
3149
3150
  binlog_trx_data *const trx_data=
3151
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3152
3153
  DBUG_ASSERT(trx_data);
3154
3155
  DBUG_PRINT("info", ("trx_data->pending(): 0x%lx", (long) trx_data->pending()));
3156
3157
  if (Rows_log_event* pending= trx_data->pending())
3158
  {
3159
    IO_CACHE *file= &log_file;
3160
3161
    /*
3162
      Decide if we should write to the log file directly or to the
3163
      transaction log.
3164
    */
3165
    if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
3166
      file= &trx_data->trans_log;
3167
3168
    /*
3169
      If we are writing to the log file directly, we could avoid
3170
      locking the log. This does not work since we need to step the
3171
      m_table_map_version below, and that change has to be protected
3172
      by the LOCK_log mutex.
3173
    */
3174
    pthread_mutex_lock(&LOCK_log);
3175
3176
    /*
3177
      Write pending event to log file or transaction cache
3178
    */
3179
    if (pending->write(file))
3180
    {
3181
      pthread_mutex_unlock(&LOCK_log);
3182
      DBUG_RETURN(1);
3183
    }
3184
3185
    /*
3186
      We step the table map version if we are writing an event
3187
      representing the end of a statement.  We do this regardless of
3188
      wheather we write to the transaction cache or to directly to the
3189
      file.
3190
3191
      In an ideal world, we could avoid stepping the table map version
3192
      if we were writing to a transaction cache, since we could then
3193
      reuse the table map that was written earlier in the transaction
3194
      cache.  This does not work since STMT_END_F implies closing all
3195
      table mappings on the slave side.
3196
3197
      TODO: Find a solution so that table maps does not have to be
3198
      written several times within a transaction.
3199
     */
3200
    if (pending->get_flags(Rows_log_event::STMT_END_F))
3201
      ++m_table_map_version;
3202
3203
    delete pending;
3204
3205
    if (file == &log_file)
3206
    {
3207
      error= flush_and_sync();
3208
      if (!error)
3209
      {
3210
        signal_update();
3211
        rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3212
      }
3213
    }
3214
3215
    pthread_mutex_unlock(&LOCK_log);
3216
  }
3217
3218
  thd->binlog_set_pending_rows_event(event);
3219
3220
  DBUG_RETURN(error);
3221
}
3222
3223
/**
3224
  Write an event to the binary log.
3225
*/
3226
3227
bool MYSQL_BIN_LOG::write(Log_event *event_info)
3228
{
3229
  THD *thd= event_info->thd;
3230
  bool error= 1;
3231
  DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
3232
3233
  if (thd->binlog_evt_union.do_union)
3234
  {
3235
    /*
3236
      In Stored function; Remember that function call caused an update.
3237
      We will log the function call to the binary log on function exit
3238
    */
3239
    thd->binlog_evt_union.unioned_events= TRUE;
3240
    thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
3241
    DBUG_RETURN(0);
3242
  }
3243
3244
  /*
3245
    Flush the pending rows event to the transaction cache or to the
3246
    log file.  Since this function potentially aquire the LOCK_log
3247
    mutex, we do this before aquiring the LOCK_log mutex in this
3248
    function.
3249
3250
    We only end the statement if we are in a top-level statement.  If
3251
    we are inside a stored function, we do not end the statement since
3252
    this will close all tables on the slave.
3253
  */
3254
  bool const end_stmt= false;
3255
  thd->binlog_flush_pending_rows_event(end_stmt);
3256
3257
  pthread_mutex_lock(&LOCK_log);
3258
3259
  /*
3260
     In most cases this is only called if 'is_open()' is true; in fact this is
3261
     mostly called if is_open() *was* true a few instructions before, but it
3262
     could have changed since.
3263
  */
3264
  if (likely(is_open()))
3265
  {
3266
    IO_CACHE *file= &log_file;
3267
    /*
3268
      In the future we need to add to the following if tests like
3269
      "do the involved tables match (to be implemented)
3270
      binlog_[wild_]{do|ignore}_table?" (WL#1049)"
3271
    */
3272
    const char *local_db= event_info->get_db();
3273
    if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
3274
	(!binlog_filter->db_ok(local_db)))
3275
    {
3276
      VOID(pthread_mutex_unlock(&LOCK_log));
3277
      DBUG_RETURN(0);
3278
    }
3279
3280
    /*
3281
      Should we write to the binlog cache or to the binlog on disk?
3282
      Write to the binlog cache if:
3283
      - it is already not empty (meaning we're in a transaction; note that the
3284
     present event could be about a non-transactional table, but still we need
3285
     to write to the binlog cache in that case to handle updates to mixed
3286
     trans/non-trans table types the best possible in binlogging)
3287
      - or if the event asks for it (cache_stmt == TRUE).
3288
    */
3289
    if (opt_using_transactions && thd)
3290
    {
3291
      if (thd->binlog_setup_trx_data())
3292
        goto err;
3293
3294
      binlog_trx_data *const trx_data=
3295
        (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3296
      IO_CACHE *trans_log= &trx_data->trans_log;
3297
      my_off_t trans_log_pos= my_b_tell(trans_log);
3298
      if (event_info->get_cache_stmt() || trans_log_pos != 0)
3299
      {
3300
        DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu",
3301
                            event_info->get_cache_stmt(),
3302
                            (ulong) trans_log_pos));
3303
        if (trans_log_pos == 0)
3304
          thd->binlog_start_trans_and_stmt();
3305
        file= trans_log;
3306
      }
3307
      /*
3308
        TODO as Mats suggested, for all the cases above where we write to
3309
        trans_log, it sounds unnecessary to lock LOCK_log. We should rather
3310
        test first if we want to write to trans_log, and if not, lock
3311
        LOCK_log.
3312
      */
3313
    }
3314
    DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
3315
3316
    /*
3317
      No check for auto events flag here - this write method should
3318
      never be called if auto-events are enabled
3319
    */
3320
3321
    /*
3322
      1. Write first log events which describe the 'run environment'
3323
      of the SQL command
3324
    */
3325
3326
    /*
3327
      If row-based binlogging, Insert_id, Rand and other kind of "setting
3328
      context" events are not needed.
3329
    */
3330
    if (thd)
3331
    {
3332
      if (!thd->current_stmt_binlog_row_based)
3333
      {
3334
        if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3335
        {
3336
          Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
3337
                             thd->first_successful_insert_id_in_prev_stmt_for_binlog);
3338
          if (e.write(file))
3339
            goto err;
3340
        }
3341
        if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3342
        {
3343
          DBUG_PRINT("info",("number of auto_inc intervals: %u",
3344
                             thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3345
                             nb_elements()));
3346
          /*
3347
            If the auto_increment was second in a table's index (possible with
3348
            MyISAM or BDB) (table->next_number_keypart != 0), such event is
3349
            in fact not necessary. We could avoid logging it.
3350
          */
3351
          Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
3352
                             thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3353
                             minimum());
3354
          if (e.write(file))
3355
            goto err;
3356
        }
3357
        if (thd->rand_used)
3358
        {
3359
          Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
3360
          if (e.write(file))
3361
            goto err;
3362
        }
3363
        if (thd->user_var_events.elements)
3364
        {
3365
          for (uint i= 0; i < thd->user_var_events.elements; i++)
3366
          {
3367
            BINLOG_USER_VAR_EVENT *user_var_event;
3368
            get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
3369
            User_var_log_event e(thd, user_var_event->user_var_event->name.str,
3370
                                 user_var_event->user_var_event->name.length,
3371
                                 user_var_event->value,
3372
                                 user_var_event->length,
3373
                                 user_var_event->type,
3374
                                 user_var_event->charset_number);
3375
            if (e.write(file))
3376
              goto err;
3377
          }
3378
        }
3379
      }
3380
    }
3381
3382
    /*
3383
       Write the SQL command
3384
     */
3385
3386
    if (event_info->write(file))
3387
      goto err;
3388
3389
    if (file == &log_file) // we are writing to the real log (disk)
3390
    {
3391
      if (flush_and_sync())
3392
	goto err;
3393
      signal_update();
3394
      rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3395
    }
3396
    error=0;
3397
3398
err:
3399
    if (error)
3400
    {
3401
      if (my_errno == EFBIG)
3402
	my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
3403
      else
3404
	my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
3405
      write_error=1;
3406
    }
3407
  }
3408
3409
  if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
3410
    ++m_table_map_version;
3411
3412
  pthread_mutex_unlock(&LOCK_log);
3413
  DBUG_RETURN(error);
3414
}
3415
3416
3417
int error_log_print(enum loglevel level, const char *format,
3418
                    va_list args)
3419
{
3420
  return logger.error_log_print(level, format, args);
3421
}
3422
3423
3424
bool slow_log_print(THD *thd, const char *query, uint query_length,
3425
                    ulonglong current_utime)
3426
{
3427
  return logger.slow_log_print(thd, query, query_length, current_utime);
3428
}
3429
3430
3431
bool LOGGER::log_command(THD *thd, enum enum_server_command command)
3432
{
3433
  /*
3434
    Log command if we have at least one log event handler enabled and want
3435
    to log this king of commands
3436
  */
3437
  if (*general_log_handler_list && (what_to_log & (1L << (uint) command)))
3438
  {
3439
    if (thd->options & OPTION_LOG_OFF)
3440
    {
3441
      /* No logging */
3442
      return FALSE;
3443
    }
3444
3445
    return TRUE;
3446
  }
3447
3448
  return FALSE;
3449
}
3450
3451
3452
bool general_log_print(THD *thd, enum enum_server_command command,
3453
                       const char *format, ...)
3454
{
3455
  va_list args;
3456
  uint error= 0;
3457
3458
  /* Print the message to the buffer if we want to log this king of commands */
3459
  if (! logger.log_command(thd, command))
3460
    return FALSE;
3461
3462
  va_start(args, format);
3463
  error= logger.general_log_print(thd, command, format, args);
3464
  va_end(args);
3465
3466
  return error;
3467
}
3468
3469
bool general_log_write(THD *thd, enum enum_server_command command,
3470
                       const char *query, uint query_length)
3471
{
3472
  /* Write the message to the log if we want to log this king of commands */
3473
  if (logger.log_command(thd, command))
3474
    return logger.general_log_write(thd, command, query, query_length);
3475
3476
  return FALSE;
3477
}
3478
3479
void MYSQL_BIN_LOG::rotate_and_purge(uint flags)
3480
{
3481
  if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3482
    pthread_mutex_lock(&LOCK_log);
3483
  if ((flags & RP_FORCE_ROTATE) ||
3484
      (my_b_tell(&log_file) >= (my_off_t) max_size))
3485
  {
3486
    new_file_without_locking();
3487
#ifdef HAVE_REPLICATION
3488
    if (expire_logs_days)
3489
    {
3490
      time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
3491
      if (purge_time >= 0)
3492
        purge_logs_before_date(purge_time);
3493
    }
3494
#endif
3495
  }
3496
  if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3497
    pthread_mutex_unlock(&LOCK_log);
3498
}
3499
3500
uint MYSQL_BIN_LOG::next_file_id()
3501
{
3502
  uint res;
3503
  pthread_mutex_lock(&LOCK_log);
3504
  res = file_id++;
3505
  pthread_mutex_unlock(&LOCK_log);
3506
  return res;
3507
}
3508
3509
3510
/*
3511
  Write the contents of a cache to the binary log.
3512
3513
  SYNOPSIS
3514
    write_cache()
3515
    cache    Cache to write to the binary log
3516
    lock_log True if the LOCK_log mutex should be aquired, false otherwise
3517
    sync_log True if the log should be flushed and sync:ed
3518
3519
  DESCRIPTION
3520
    Write the contents of the cache to the binary log. The cache will
3521
    be reset as a READ_CACHE to be able to read the contents from it.
3522
 */
3523
3524
int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
3525
{
3526
  Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
3527
3528
  if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
3529
    return ER_ERROR_ON_WRITE;
3530
  uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
3531
  long val;
3532
  uchar header[LOG_EVENT_HEADER_LEN];
3533
3534
  /*
3535
    The events in the buffer have incorrect end_log_pos data
3536
    (relative to beginning of group rather than absolute),
3537
    so we'll recalculate them in situ so the binlog is always
3538
    correct, even in the middle of a group. This is possible
3539
    because we now know the start position of the group (the
3540
    offset of this cache in the log, if you will); all we need
3541
    to do is to find all event-headers, and add the position of
3542
    the group to the end_log_pos of each event.  This is pretty
3543
    straight forward, except that we read the cache in segments,
3544
    so an event-header might end up on the cache-border and get
3545
    split.
3546
  */
3547
3548
  group= (uint)my_b_tell(&log_file);
3549
  hdr_offs= carry= 0;
3550
3551
  do
3552
  {
3553
3554
    /*
3555
      if we only got a partial header in the last iteration,
3556
      get the other half now and process a full header.
3557
    */
3558
    if (unlikely(carry > 0))
3559
    {
3560
      DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
3561
3562
      /* assemble both halves */
3563
      memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
3564
3565
      /* fix end_log_pos */
3566
      val= uint4korr(&header[LOG_POS_OFFSET]) + group;
3567
      int4store(&header[LOG_POS_OFFSET], val);
3568
3569
      /* write the first half of the split header */
3570
      if (my_b_write(&log_file, header, carry))
3571
        return ER_ERROR_ON_WRITE;
3572
3573
      /*
3574
        copy fixed second half of header to cache so the correct
3575
        version will be written later.
3576
      */
3577
      memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
3578
3579
      /* next event header at ... */
3580
      hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
3581
3582
      carry= 0;
3583
    }
3584
3585
    /* if there is anything to write, process it. */
3586
3587
    if (likely(length > 0))
3588
    {
3589
      /*
3590
        process all event-headers in this (partial) cache.
3591
        if next header is beyond current read-buffer,
3592
        we'll get it later (though not necessarily in the
3593
        very next iteration, just "eventually").
3594
      */
3595
3596
      while (hdr_offs < length)
3597
      {
3598
        /*
3599
          partial header only? save what we can get, process once
3600
          we get the rest.
3601
        */
3602
3603
        if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
3604
        {
3605
          carry= length - hdr_offs;
3606
          memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
3607
          length= hdr_offs;
3608
        }
3609
        else
3610
        {
3611
          /* we've got a full event-header, and it came in one piece */
3612
3613
          uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
3614
3615
          /* fix end_log_pos */
3616
          val= uint4korr(log_pos) + group;
3617
          int4store(log_pos, val);
3618
3619
          /* next event header at ... */
3620
          log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
3621
          hdr_offs += uint4korr(log_pos);
3622
3623
        }
3624
      }
3625
3626
      /*
3627
        Adjust hdr_offs. Note that it may still point beyond the segment
3628
        read in the next iteration; if the current event is very long,
3629
        it may take a couple of read-iterations (and subsequent adjustments
3630
        of hdr_offs) for it to point into the then-current segment.
3631
        If we have a split header (!carry), hdr_offs will be set at the
3632
        beginning of the next iteration, overwriting the value we set here:
3633
      */
3634
      hdr_offs -= length;
3635
    }
3636
3637
    /* Write data to the binary log file */
3638
    if (my_b_write(&log_file, cache->read_pos, length))
3639
      return ER_ERROR_ON_WRITE;
3640
    cache->read_pos=cache->read_end;		// Mark buffer used up
3641
  } while ((length= my_b_fill(cache)));
3642
3643
  DBUG_ASSERT(carry == 0);
3644
3645
  if (sync_log)
3646
    flush_and_sync();
3647
3648
  return 0;                                     // All OK
3649
}
3650
3651
/**
3652
  Write a cached log entry to the binary log.
3653
  - To support transaction over replication, we wrap the transaction
3654
  with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
3655
  We want to write a BEGIN/ROLLBACK block when a non-transactional table
3656
  was updated in a transaction which was rolled back. This is to ensure
3657
  that the same updates are run on the slave.
3658
3659
  @param thd
3660
  @param cache		The cache to copy to the binlog
3661
  @param commit_event   The commit event to print after writing the
3662
                        contents of the cache.
3663
3664
  @note
3665
    We only come here if there is something in the cache.
3666
  @note
3667
    The thing in the cache is always a complete transaction.
3668
  @note
3669
    'cache' needs to be reinitialized after this functions returns.
3670
*/
3671
3672
bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
3673
{
3674
  DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
3675
  VOID(pthread_mutex_lock(&LOCK_log));
3676
3677
  /* NULL would represent nothing to replicate after ROLLBACK */
3678
  DBUG_ASSERT(commit_event != NULL);
3679
3680
  DBUG_ASSERT(is_open());
3681
  if (likely(is_open()))                       // Should always be true
3682
  {
3683
    /*
3684
      We only bother to write to the binary log if there is anything
3685
      to write.
3686
     */
3687
    if (my_b_tell(cache) > 0)
3688
    {
3689
      /*
3690
        Log "BEGIN" at the beginning of every transaction.  Here, a
3691
        transaction is either a BEGIN..COMMIT block or a single
3692
        statement in autocommit mode.
3693
      */
3694
      Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE);
3695
      /*
3696
        Imagine this is rollback due to net timeout, after all
3697
        statements of the transaction succeeded. Then we want a
3698
        zero-error code in BEGIN.  In other words, if there was a
3699
        really serious error code it's already in the statement's
3700
        events, there is no need to put it also in this internally
3701
        generated event, and as this event is generated late it would
3702
        lead to false alarms.
3703
3704
        This is safer than thd->clear_error() against kills at shutdown.
3705
      */
3706
      qinfo.error_code= 0;
3707
      /*
3708
        Now this Query_log_event has artificial log_pos 0. It must be
3709
        adjusted to reflect the real position in the log. Not doing it
3710
        would confuse the slave: it would prevent this one from
3711
        knowing where he is in the master's binlog, which would result
3712
        in wrong positions being shown to the user, MASTER_POS_WAIT
3713
        undue waiting etc.
3714
      */
3715
      if (qinfo.write(&log_file))
3716
        goto err;
3717
3718
      DBUG_EXECUTE_IF("crash_before_writing_xid",
3719
                      {
3720
                        if ((write_error= write_cache(cache, false, true)))
3721
                          DBUG_PRINT("info", ("error writing binlog cache: %d",
3722
                                               write_error));
3723
                        DBUG_PRINT("info", ("crashing before writing xid"));
3724
                        abort();
3725
                      });
3726
3727
      if ((write_error= write_cache(cache, false, false)))
3728
        goto err;
3729
3730
      if (commit_event && commit_event->write(&log_file))
3731
        goto err;
3732
      if (flush_and_sync())
3733
        goto err;
3734
      DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
3735
      if (cache->error)				// Error on read
3736
      {
3737
        sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
3738
        write_error=1;				// Don't give more errors
3739
        goto err;
3740
      }
3741
      signal_update();
3742
    }
3743
3744
    /*
3745
      if commit_event is Xid_log_event, increase the number of
3746
      prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
3747
      if there're prepared xids in it - see the comment in new_file() for
3748
      an explanation.
3749
      If the commit_event is not Xid_log_event (then it's a Query_log_event)
3750
      rotate binlog, if necessary.
3751
    */
3752
    if (commit_event && commit_event->get_type_code() == XID_EVENT)
3753
    {
3754
      pthread_mutex_lock(&LOCK_prep_xids);
3755
      prepared_xids++;
3756
      pthread_mutex_unlock(&LOCK_prep_xids);
3757
    }
3758
    else
3759
      rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3760
  }
3761
  VOID(pthread_mutex_unlock(&LOCK_log));
3762
3763
  DBUG_RETURN(0);
3764
3765
err:
3766
  if (!write_error)
3767
  {
3768
    write_error= 1;
3769
    sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
3770
  }
3771
  VOID(pthread_mutex_unlock(&LOCK_log));
3772
  DBUG_RETURN(1);
3773
}
3774
3775
3776
/**
3777
  Wait until we get a signal that the relay log has been updated
3778
3779
  @param[in] thd   a THD struct
3780
  @note
3781
    LOCK_log must be taken before calling this function.
3782
    It will be released at the end of the function.
3783
*/
3784
3785
void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
3786
{
3787
  const char *old_msg;
3788
  DBUG_ENTER("wait_for_update_relay_log");
3789
  old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3790
                           "Slave has read all relay log; " 
3791
                           "waiting for the slave I/O "
3792
                           "thread to update it" );
3793
  pthread_cond_wait(&update_cond, &LOCK_log);
3794
  thd->exit_cond(old_msg);
3795
  DBUG_VOID_RETURN;
3796
}
3797
3798
3799
/**
3800
  Wait until we get a signal that the binary log has been updated.
3801
  Applies to master only.
3802
     
3803
  NOTES
3804
  @param[in] thd        a THD struct
3805
  @param[in] timeout    a pointer to a timespec;
3806
                        NULL means to wait w/o timeout.
3807
  @retval    0          if got signalled on update
3808
  @retval    non-0      if wait timeout elapsed
3809
  @note
3810
    LOCK_log must be taken before calling this function.
3811
    LOCK_log is being released while the thread is waiting.
3812
    LOCK_log is released by the caller.
3813
*/
3814
3815
int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
3816
                                           const struct timespec *timeout)
3817
{
3818
  int ret= 0;
3819
  const char* old_msg = thd->proc_info;
3820
  DBUG_ENTER("wait_for_update_bin_log");
3821
  old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3822
                           "Master has sent all binlog to slave; "
3823
                           "waiting for binlog to be updated");
3824
  if (!timeout)
3825
    pthread_cond_wait(&update_cond, &LOCK_log);
3826
  else
3827
    ret= pthread_cond_timedwait(&update_cond, &LOCK_log,
3828
                                const_cast<struct timespec *>(timeout));
3829
  DBUG_RETURN(ret);
3830
}
3831
3832
3833
/**
3834
  Close the log file.
3835
3836
  @param exiting     Bitmask for one or more of the following bits:
3837
          - LOG_CLOSE_INDEX : if we should close the index file
3838
          - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
3839
                                     at once after close.
3840
          - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
3841
3842
  @note
3843
    One can do an open on the object at once after doing a close.
3844
    The internal structures are not freed until cleanup() is called
3845
*/
3846
3847
void MYSQL_BIN_LOG::close(uint exiting)
3848
{					// One can't set log_type here!
3849
  DBUG_ENTER("MYSQL_BIN_LOG::close");
3850
  DBUG_PRINT("enter",("exiting: %d", (int) exiting));
3851
  if (log_state == LOG_OPENED)
3852
  {
3853
#ifdef HAVE_REPLICATION
3854
    if (log_type == LOG_BIN && !no_auto_events &&
3855
	(exiting & LOG_CLOSE_STOP_EVENT))
3856
    {
3857
      Stop_log_event s;
3858
      s.write(&log_file);
3859
      bytes_written+= s.data_written;
3860
      signal_update();
3861
    }
3862
#endif /* HAVE_REPLICATION */
3863
3864
    /* don't pwrite in a file opened with O_APPEND - it doesn't work */
3865
    if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
3866
    {
3867
      my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
3868
      uchar flags= 0;            // clearing LOG_EVENT_BINLOG_IN_USE_F
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
3869
      pwrite(log_file.file, &flags, 1, offset);
1 by brian
clean slate
3870
    }
3871
3872
    /* this will cleanup IO_CACHE, sync and close the file */
3873
    MYSQL_LOG::close(exiting);
3874
  }
3875
3876
  /*
3877
    The following test is needed even if is_open() is not set, as we may have
3878
    called a not complete close earlier and the index file is still open.
3879
  */
3880
3881
  if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
3882
  {
3883
    end_io_cache(&index_file);
3884
    if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
3885
    {
3886
      write_error= 1;
3887
      sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
3888
    }
3889
  }
3890
  log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
3891
  safeFree(name);
3892
  DBUG_VOID_RETURN;
3893
}
3894
3895
3896
void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
3897
{
3898
  /*
3899
    We need to take locks, otherwise this may happen:
3900
    new_file() is called, calls open(old_max_size), then before open() starts,
3901
    set_max_size() sets max_size to max_size_arg, then open() starts and
3902
    uses the old_max_size argument, so max_size_arg has been overwritten and
3903
    it's like if the SET command was never run.
3904
  */
3905
  DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
3906
  pthread_mutex_lock(&LOCK_log);
3907
  if (is_open())
3908
    max_size= max_size_arg;
3909
  pthread_mutex_unlock(&LOCK_log);
3910
  DBUG_VOID_RETURN;
3911
}
3912
3913
3914
/**
3915
  Check if a string is a valid number.
3916
3917
  @param str			String to test
3918
  @param res			Store value here
3919
  @param allow_wildcards	Set to 1 if we should ignore '%' and '_'
3920
3921
  @note
3922
    For the moment the allow_wildcards argument is not used
3923
    Should be move to some other file.
3924
3925
  @retval
3926
    1	String is a number
3927
  @retval
3928
    0	Error
3929
*/
3930
3931
static bool test_if_number(register const char *str,
3932
			   long *res, bool allow_wildcards)
3933
{
3934
  register int flag;
3935
  const char *start;
3936
  DBUG_ENTER("test_if_number");
3937
3938
  flag= 0; 
3939
  start= str;
3940
  while (*str++ == ' ') ;
3941
  if (*--str == '-' || *str == '+')
3942
    str++;
3943
  while (my_isdigit(files_charset_info,*str) ||
3944
	 (allow_wildcards && (*str == wild_many || *str == wild_one)))
3945
  {
3946
    flag=1;
3947
    str++;
3948
  }
3949
  if (*str == '.')
3950
  {
3951
    for (str++ ;
3952
	 my_isdigit(files_charset_info,*str) ||
3953
	   (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3954
	 str++, flag=1) ;
3955
  }
3956
  if (*str != 0 || flag == 0)
3957
    DBUG_RETURN(0);
3958
  if (res)
3959
    *res=atol(start);
3960
  DBUG_RETURN(1);			/* Number ok */
3961
} /* test_if_number */
3962
3963
3964
void sql_perror(const char *message)
3965
{
3966
#ifdef HAVE_STRERROR
3967
  sql_print_error("%s: %s",message, strerror(errno));
3968
#else
3969
  perror(message);
3970
#endif
3971
}
3972
3973
3974
bool flush_error_log()
3975
{
3976
  bool result=0;
3977
  if (opt_error_log)
3978
  {
3979
    char err_renamed[FN_REFLEN], *end;
3980
    end= strmake(err_renamed,log_error_file,FN_REFLEN-4);
3981
    strmov(end, "-old");
3982
    VOID(pthread_mutex_lock(&LOCK_error_log));
3983
    char err_temp[FN_REFLEN+4];
3984
    /*
3985
     On Windows is necessary a temporary file for to rename
3986
     the current error file.
3987
    */
3988
    strxmov(err_temp, err_renamed,"-tmp",NullS);
3989
    (void) my_delete(err_temp, MYF(0)); 
3990
    if (freopen(err_temp,"a+",stdout))
3991
    {
3992
      int fd;
3993
      size_t bytes;
3994
      uchar buf[IO_SIZE];
3995
3996
      freopen(err_temp,"a+",stderr);
3997
      (void) my_delete(err_renamed, MYF(0));
3998
      my_rename(log_error_file,err_renamed,MYF(0));
3999
      if (freopen(log_error_file,"a+",stdout))
4000
        freopen(log_error_file,"a+",stderr);
4001
4002
      if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0)
4003
      {
4004
        while ((bytes= my_read(fd, buf, IO_SIZE, MYF(0))) &&
4005
               bytes != MY_FILE_ERROR)
4006
          my_fwrite(stderr, buf, bytes, MYF(0));
4007
        my_close(fd, MYF(0));
4008
      }
4009
      (void) my_delete(err_temp, MYF(0)); 
4010
    }
4011
    else
4012
     result= 1;
4013
    VOID(pthread_mutex_unlock(&LOCK_error_log));
4014
  }
4015
   return result;
4016
}
4017
4018
void MYSQL_BIN_LOG::signal_update()
4019
{
4020
  DBUG_ENTER("MYSQL_BIN_LOG::signal_update");
4021
  pthread_cond_broadcast(&update_cond);
4022
  DBUG_VOID_RETURN;
4023
}
4024
4025
/**
4026
  Prints a printf style message to the error log and, under NT, to the
4027
  Windows event log.
4028
4029
  This function prints the message into a buffer and then sends that buffer
4030
  to other functions to write that message to other logging sources.
4031
4032
  @param event_type          Type of event to write (Error, Warning, or Info)
4033
  @param format              Printf style format of message
4034
  @param args                va_list list of arguments for the message
4035
4036
  @returns
4037
    The function always returns 0. The return value is present in the
4038
    signature to be compatible with other logging routines, which could
4039
    return an error (e.g. logging to the log tables)
4040
*/
77.1.45 by Monty Taylor
Warning fixes.
4041
static void print_buffer_to_file(enum loglevel level,
4042
                                 int error_code __attribute__((__unused__)),
4043
                                 const char *buffer,
4044
                                 size_t buffer_length __attribute__((__unused__)))
1 by brian
clean slate
4045
{
4046
  time_t skr;
4047
  struct tm tm_tmp;
4048
  struct tm *start;
4049
  DBUG_ENTER("print_buffer_to_file");
4050
  DBUG_PRINT("enter",("buffer: %s", buffer));
4051
4052
  VOID(pthread_mutex_lock(&LOCK_error_log));
4053
4054
  skr= my_time(0);
4055
  localtime_r(&skr, &tm_tmp);
4056
  start=&tm_tmp;
4057
4058
  fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n",
4059
          start->tm_year % 100,
4060
          start->tm_mon+1,
4061
          start->tm_mday,
4062
          start->tm_hour,
4063
          start->tm_min,
4064
          start->tm_sec,
4065
          (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
4066
           "Warning" : "Note"),
4067
          buffer);
4068
4069
  fflush(stderr);
4070
4071
  VOID(pthread_mutex_unlock(&LOCK_error_log));
4072
  DBUG_VOID_RETURN;
4073
}
4074
4075
4076
int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
4077
{
4078
  char   buff[1024];
4079
  size_t length;
4080
  int error_code= errno;
4081
  DBUG_ENTER("vprint_msg_to_log");
4082
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
4083
  length= vsnprintf(buff, sizeof(buff), format, args);
1 by brian
clean slate
4084
4085
  print_buffer_to_file(level, error_code, buff, length);
4086
4087
  DBUG_RETURN(0);
4088
}
4089
4090
4091
void sql_print_error(const char *format, ...) 
4092
{
4093
  va_list args;
4094
  DBUG_ENTER("sql_print_error");
4095
4096
  va_start(args, format);
4097
  error_log_print(ERROR_LEVEL, format, args);
4098
  va_end(args);
4099
4100
  DBUG_VOID_RETURN;
4101
}
4102
4103
4104
void sql_print_warning(const char *format, ...) 
4105
{
4106
  va_list args;
4107
  DBUG_ENTER("sql_print_warning");
4108
4109
  va_start(args, format);
4110
  error_log_print(WARNING_LEVEL, format, args);
4111
  va_end(args);
4112
4113
  DBUG_VOID_RETURN;
4114
}
4115
4116
4117
void sql_print_information(const char *format, ...) 
4118
{
4119
  va_list args;
4120
  DBUG_ENTER("sql_print_information");
4121
4122
  va_start(args, format);
4123
  error_log_print(INFORMATION_LEVEL, format, args);
4124
  va_end(args);
4125
4126
  DBUG_VOID_RETURN;
4127
}
4128
4129
4130
/********* transaction coordinator log for 2pc - mmap() based solution *******/
4131
4132
/*
4133
  the log consists of a file, mmapped to a memory.
4134
  file is divided on pages of tc_log_page_size size.
4135
  (usable size of the first page is smaller because of log header)
4136
  there's PAGE control structure for each page
4137
  each page (or rather PAGE control structure) can be in one of three
4138
  states - active, syncing, pool.
4139
  there could be only one page in active or syncing states,
4140
  but many in pool - pool is fifo queue.
4141
  usual lifecycle of a page is pool->active->syncing->pool
4142
  "active" page - is a page where new xid's are logged.
4143
  the page stays active as long as syncing slot is taken.
4144
  "syncing" page is being synced to disk. no new xid can be added to it.
4145
  when the sync is done the page is moved to a pool and an active page
4146
  becomes "syncing".
4147
4148
  the result of such an architecture is a natural "commit grouping" -
4149
  If commits are coming faster than the system can sync, they do not
4150
  stall. Instead, all commit that came since the last sync are
4151
  logged to the same page, and they all are synced with the next -
4152
  one - sync. Thus, thought individual commits are delayed, throughput
4153
  is not decreasing.
4154
4155
  when a xid is added to an active page, the thread of this xid waits
4156
  for a page's condition until the page is synced. when syncing slot
4157
  becomes vacant one of these waiters is awaken to take care of syncing.
4158
  it syncs the page and signals all waiters that the page is synced.
4159
  PAGE::waiters is used to count these waiters, and a page may never
4160
  become active again until waiters==0 (that is all waiters from the
4161
  previous sync have noticed the sync was completed)
4162
4163
  note, that the page becomes "dirty" and has to be synced only when a
4164
  new xid is added into it. Removing a xid from a page does not make it
4165
  dirty - we don't sync removals to disk.
4166
*/
4167
4168
ulong tc_log_page_waits= 0;
4169
4170
#ifdef HAVE_MMAP
4171
4172
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
4173
4174
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
4175
4176
ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
4177
ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
4178
4179
int TC_LOG_MMAP::open(const char *opt_name)
4180
{
4181
  uint i;
4182
  bool crashed=FALSE;
4183
  PAGE *pg;
4184
4185
  DBUG_ASSERT(total_ha_2pc > 1);
4186
  DBUG_ASSERT(opt_name && opt_name[0]);
4187
4188
  tc_log_page_size= my_getpagesize();
4189
  DBUG_ASSERT(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
4190
4191
  fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
4192
  if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
4193
  {
4194
    if (my_errno != ENOENT)
4195
      goto err;
4196
    if (using_heuristic_recover())
4197
      return 1;
4198
    if ((fd= my_create(logname, CREATE_MODE, O_RDWR, MYF(MY_WME))) < 0)
4199
      goto err;
4200
    inited=1;
4201
    file_length= opt_tc_log_size;
30 by Brian Aker
Large file and ftruncate() support
4202
    if (ftruncate(fd, file_length))
1 by brian
clean slate
4203
      goto err;
4204
  }
4205
  else
4206
  {
4207
    inited= 1;
4208
    crashed= TRUE;
4209
    sql_print_information("Recovering after a crash using %s", opt_name);
4210
    if (tc_heuristic_recover)
4211
    {
4212
      sql_print_error("Cannot perform automatic crash recovery when "
4213
                      "--tc-heuristic-recover is used");
4214
      goto err;
4215
    }
4216
    file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
4217
    if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
4218
      goto err;
4219
  }
4220
4221
  data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
4222
                        MAP_NOSYNC|MAP_SHARED, fd, 0);
4223
  if (data == MAP_FAILED)
4224
  {
4225
    my_errno=errno;
4226
    goto err;
4227
  }
4228
  inited=2;
4229
4230
  npages=(uint)file_length/tc_log_page_size;
4231
  DBUG_ASSERT(npages >= 3);             // to guarantee non-empty pool
4232
  if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
4233
    goto err;
4234
  inited=3;
4235
  for (pg=pages, i=0; i < npages; i++, pg++)
4236
  {
4237
    pg->next=pg+1;
4238
    pg->waiters=0;
4239
    pg->state=POOL;
4240
    pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
4241
    pthread_cond_init (&pg->cond, 0);
4242
    pg->start=(my_xid *)(data + i*tc_log_page_size);
4243
    pg->ptr=pg->start;
4244
    pg->end=(my_xid *)(pg->start + tc_log_page_size);
4245
    pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
4246
  }
4247
  pages[0].size=pages[0].free=
4248
                (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
4249
  pages[0].start=pages[0].end-pages[0].size;
4250
  pages[npages-1].next=0;
4251
  inited=4;
4252
4253
  if (crashed && recover())
4254
      goto err;
4255
4256
  memcpy(data, tc_log_magic, sizeof(tc_log_magic));
4257
  data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
15 by brian
Fix for stat, NETWARE removal
4258
  msync(data, tc_log_page_size, MS_SYNC);
4259
  my_sync(fd, MYF(0));
1 by brian
clean slate
4260
  inited=5;
4261
4262
  pthread_mutex_init(&LOCK_sync,    MY_MUTEX_INIT_FAST);
4263
  pthread_mutex_init(&LOCK_active,  MY_MUTEX_INIT_FAST);
4264
  pthread_mutex_init(&LOCK_pool,    MY_MUTEX_INIT_FAST);
4265
  pthread_cond_init(&COND_active, 0);
4266
  pthread_cond_init(&COND_pool, 0);
4267
4268
  inited=6;
4269
4270
  syncing= 0;
4271
  active=pages;
4272
  pool=pages+1;
4273
  pool_last=pages+npages-1;
4274
4275
  return 0;
4276
4277
err:
4278
  close();
4279
  return 1;
4280
}
4281
4282
/**
4283
  there is no active page, let's got one from the pool.
4284
4285
  Two strategies here:
4286
    -# take the first from the pool
4287
    -# if there're waiters - take the one with the most free space.
4288
4289
  @todo
4290
    TODO page merging. try to allocate adjacent page first,
4291
    so that they can be flushed both in one sync
4292
*/
4293
4294
void TC_LOG_MMAP::get_active_from_pool()
4295
{
4296
  PAGE **p, **best_p=0;
4297
  int best_free;
4298
4299
  if (syncing)
4300
    pthread_mutex_lock(&LOCK_pool);
4301
4302
  do
4303
  {
4304
    best_p= p= &pool;
4305
    if ((*p)->waiters == 0) // can the first page be used ?
4306
      break;                // yes - take it.
4307
4308
    best_free=0;            // no - trying second strategy
4309
    for (p=&(*p)->next; *p; p=&(*p)->next)
4310
    {
4311
      if ((*p)->waiters == 0 && (*p)->free > best_free)
4312
      {
4313
        best_free=(*p)->free;
4314
        best_p=p;
4315
      }
4316
    }
4317
  }
4318
  while ((*best_p == 0 || best_free == 0) && overflow());
4319
4320
  active=*best_p;
4321
  if (active->free == active->size) // we've chosen an empty page
4322
  {
4323
    tc_log_cur_pages_used++;
4324
    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
4325
  }
4326
4327
  if ((*best_p)->next)              // unlink the page from the pool
4328
    *best_p=(*best_p)->next;
4329
  else
4330
    pool_last=*best_p;
4331
4332
  if (syncing)
4333
    pthread_mutex_unlock(&LOCK_pool);
4334
}
4335
4336
/**
4337
  @todo
4338
  perhaps, increase log size ?
4339
*/
4340
int TC_LOG_MMAP::overflow()
4341
{
4342
  /*
4343
    simple overflow handling - just wait
4344
    TODO perhaps, increase log size ?
4345
    let's check the behaviour of tc_log_page_waits first
4346
  */
4347
  tc_log_page_waits++;
4348
  pthread_cond_wait(&COND_pool, &LOCK_pool);
4349
  return 1; // always return 1
4350
}
4351
4352
/**
4353
  Record that transaction XID is committed on the persistent storage.
4354
4355
    This function is called in the middle of two-phase commit:
4356
    First all resources prepare the transaction, then tc_log->log() is called,
4357
    then all resources commit the transaction, then tc_log->unlog() is called.
4358
4359
    All access to active page is serialized but it's not a problem, as
4360
    we're assuming that fsync() will be a main bottleneck.
4361
    That is, parallelizing writes to log pages we'll decrease number of
4362
    threads waiting for a page, but then all these threads will be waiting
4363
    for a fsync() anyway
4364
4365
   If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
4366
   records XID in a special Xid_log_event.
4367
   If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
4368
   log.
4369
4370
  @retval
4371
    0  - error
4372
  @retval
4373
    \# - otherwise, "cookie", a number that will be passed as an argument
4374
    to unlog() call. tc_log can define it any way it wants,
4375
    and use for whatever purposes. TC_LOG_MMAP sets it
4376
    to the position in memory where xid was logged to.
4377
*/
4378
77.1.45 by Monty Taylor
Warning fixes.
4379
int TC_LOG_MMAP::log_xid(THD *thd __attribute__((__unused__)), my_xid xid)
1 by brian
clean slate
4380
{
4381
  int err;
4382
  PAGE *p;
4383
  ulong cookie;
4384
4385
  pthread_mutex_lock(&LOCK_active);
4386
4387
  /*
4388
    if active page is full - just wait...
4389
    frankly speaking, active->free here accessed outside of mutex
4390
    protection, but it's safe, because it only means we may miss an
4391
    unlog() for the active page, and we're not waiting for it here -
4392
    unlog() does not signal COND_active.
4393
  */
4394
  while (unlikely(active && active->free == 0))
4395
    pthread_cond_wait(&COND_active, &LOCK_active);
4396
4397
  /* no active page ? take one from the pool */
4398
  if (active == 0)
4399
    get_active_from_pool();
4400
4401
  p=active;
4402
  pthread_mutex_lock(&p->lock);
4403
4404
  /* searching for an empty slot */
4405
  while (*p->ptr)
4406
  {
4407
    p->ptr++;
4408
    DBUG_ASSERT(p->ptr < p->end);               // because p->free > 0
4409
  }
4410
4411
  /* found! store xid there and mark the page dirty */
4412
  cookie= (ulong)((uchar *)p->ptr - data);      // can never be zero
4413
  *p->ptr++= xid;
4414
  p->free--;
4415
  p->state= DIRTY;
4416
4417
  /* to sync or not to sync - this is the question */
4418
  pthread_mutex_unlock(&LOCK_active);
4419
  pthread_mutex_lock(&LOCK_sync);
4420
  pthread_mutex_unlock(&p->lock);
4421
4422
  if (syncing)
4423
  {                                          // somebody's syncing. let's wait
4424
    p->waiters++;
4425
    /*
4426
      note - it must be while (), not do ... while () here
4427
      as p->state may be not DIRTY when we come here
4428
    */
4429
    while (p->state == DIRTY && syncing)
4430
      pthread_cond_wait(&p->cond, &LOCK_sync);
4431
    p->waiters--;
4432
    err= p->state == ERROR;
4433
    if (p->state != DIRTY)                   // page was synced
4434
    {
4435
      if (p->waiters == 0)
4436
        pthread_cond_signal(&COND_pool);     // in case somebody's waiting
4437
      pthread_mutex_unlock(&LOCK_sync);
4438
      goto done;                             // we're done
4439
    }
4440
  }                                          // page was not synced! do it now
4441
  DBUG_ASSERT(active == p && syncing == 0);
4442
  pthread_mutex_lock(&LOCK_active);
4443
  syncing=p;                                 // place is vacant - take it
4444
  active=0;                                  // page is not active anymore
4445
  pthread_cond_broadcast(&COND_active);      // in case somebody's waiting
4446
  pthread_mutex_unlock(&LOCK_active);
4447
  pthread_mutex_unlock(&LOCK_sync);
4448
  err= sync();
4449
4450
done:
4451
  return err ? 0 : cookie;
4452
}
4453
4454
int TC_LOG_MMAP::sync()
4455
{
4456
  int err;
4457
4458
  DBUG_ASSERT(syncing != active);
4459
4460
  /*
4461
    sit down and relax - this can take a while...
4462
    note - no locks are held at this point
4463
  */
15 by brian
Fix for stat, NETWARE removal
4464
  err= msync(syncing->start, 1, MS_SYNC);
4465
  if(err==0)
4466
    err= my_sync(fd, MYF(0));
1 by brian
clean slate
4467
4468
  /* page is synced. let's move it to the pool */
4469
  pthread_mutex_lock(&LOCK_pool);
4470
  pool_last->next=syncing;
4471
  pool_last=syncing;
4472
  syncing->next=0;
4473
  syncing->state= err ? ERROR : POOL;
4474
  pthread_cond_broadcast(&syncing->cond);    // signal "sync done"
4475
  pthread_cond_signal(&COND_pool);           // in case somebody's waiting
4476
  pthread_mutex_unlock(&LOCK_pool);
4477
4478
  /* marking 'syncing' slot free */
4479
  pthread_mutex_lock(&LOCK_sync);
4480
  syncing=0;
4481
  pthread_cond_signal(&active->cond);        // wake up a new syncer
4482
  pthread_mutex_unlock(&LOCK_sync);
4483
  return err;
4484
}
4485
4486
/**
4487
  erase xid from the page, update page free space counters/pointers.
4488
  cookie points directly to the memory where xid was logged.
4489
*/
4490
77.1.45 by Monty Taylor
Warning fixes.
4491
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid __attribute__((__unused__)))
1 by brian
clean slate
4492
{
4493
  PAGE *p=pages+(cookie/tc_log_page_size);
4494
  my_xid *x=(my_xid *)(data+cookie);
4495
4496
  DBUG_ASSERT(*x == xid);
4497
  DBUG_ASSERT(x >= p->start && x < p->end);
4498
  *x=0;
4499
4500
  pthread_mutex_lock(&p->lock);
4501
  p->free++;
4502
  DBUG_ASSERT(p->free <= p->size);
4503
  set_if_smaller(p->ptr, x);
4504
  if (p->free == p->size)               // the page is completely empty
4505
    statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
4506
  if (p->waiters == 0)                 // the page is in pool and ready to rock
4507
    pthread_cond_signal(&COND_pool);   // ping ... for overflow()
4508
  pthread_mutex_unlock(&p->lock);
4509
}
4510
4511
void TC_LOG_MMAP::close()
4512
{
4513
  uint i;
4514
  switch (inited) {
4515
  case 6:
4516
    pthread_mutex_destroy(&LOCK_sync);
4517
    pthread_mutex_destroy(&LOCK_active);
4518
    pthread_mutex_destroy(&LOCK_pool);
4519
    pthread_cond_destroy(&COND_pool);
4520
  case 5:
4521
    data[0]='A'; // garble the first (signature) byte, in case my_delete fails
4522
  case 4:
4523
    for (i=0; i < npages; i++)
4524
    {
4525
      if (pages[i].ptr == 0)
4526
        break;
4527
      pthread_mutex_destroy(&pages[i].lock);
4528
      pthread_cond_destroy(&pages[i].cond);
4529
    }
4530
  case 3:
4531
    my_free((uchar*)pages, MYF(0));
4532
  case 2:
4533
    my_munmap((char*)data, (size_t)file_length);
4534
  case 1:
4535
    my_close(fd, MYF(0));
4536
  }
4537
  if (inited>=5) // cannot do in the switch because of Windows
4538
    my_delete(logname, MYF(MY_WME));
4539
  inited=0;
4540
}
4541
4542
int TC_LOG_MMAP::recover()
4543
{
4544
  HASH xids;
4545
  PAGE *p=pages, *end_p=pages+npages;
4546
4547
  if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
4548
  {
4549
    sql_print_error("Bad magic header in tc log");
4550
    goto err1;
4551
  }
4552
4553
  /*
4554
    the first byte after magic signature is set to current
4555
    number of storage engines on startup
4556
  */
4557
  if (data[sizeof(tc_log_magic)] != total_ha_2pc)
4558
  {
4559
    sql_print_error("Recovery failed! You must enable "
4560
                    "exactly %d storage engines that support "
4561
                    "two-phase commit protocol",
4562
                    data[sizeof(tc_log_magic)]);
4563
    goto err1;
4564
  }
4565
4566
  if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
4567
                sizeof(my_xid), 0, 0, MYF(0)))
4568
    goto err1;
4569
4570
  for ( ; p < end_p ; p++)
4571
  {
4572
    for (my_xid *x=p->start; x < p->end; x++)
4573
      if (*x && my_hash_insert(&xids, (uchar *)x))
4574
        goto err2; // OOM
4575
  }
4576
4577
  if (ha_recover(&xids))
4578
    goto err2;
4579
4580
  hash_free(&xids);
4581
  bzero(data, (size_t)file_length);
4582
  return 0;
4583
4584
err2:
4585
  hash_free(&xids);
4586
err1:
4587
  sql_print_error("Crash recovery failed. Either correct the problem "
4588
                  "(if it's, for example, out of memory error) and restart, "
4589
                  "or delete tc log and start mysqld with "
4590
                  "--tc-heuristic-recover={commit|rollback}");
4591
  return 1;
4592
}
4593
#endif
4594
4595
TC_LOG *tc_log;
4596
TC_LOG_DUMMY tc_log_dummy;
4597
TC_LOG_MMAP  tc_log_mmap;
4598
4599
/**
4600
  Perform heuristic recovery, if --tc-heuristic-recover was used.
4601
4602
  @note
4603
    no matter whether heuristic recovery was successful or not
4604
    mysqld must exit. So, return value is the same in both cases.
4605
4606
  @retval
4607
    0	no heuristic recovery was requested
4608
  @retval
4609
    1   heuristic recovery was performed
4610
*/
4611
4612
int TC_LOG::using_heuristic_recover()
4613
{
4614
  if (!tc_heuristic_recover)
4615
    return 0;
4616
4617
  sql_print_information("Heuristic crash recovery mode");
4618
  if (ha_recover(0))
4619
    sql_print_error("Heuristic crash recovery failed");
4620
  sql_print_information("Please restart mysqld without --tc-heuristic-recover");
4621
  return 1;
4622
}
4623
4624
/****** transaction coordinator log for 2pc - binlog() based solution ******/
4625
#define TC_LOG_BINLOG MYSQL_BIN_LOG
4626
4627
/**
4628
  @todo
4629
  keep in-memory list of prepared transactions
4630
  (add to list in log(), remove on unlog())
4631
  and copy it to the new binlog if rotated
4632
  but let's check the behaviour of tc_log_page_waits first!
4633
*/
4634
4635
int TC_LOG_BINLOG::open(const char *opt_name)
4636
{
4637
  LOG_INFO log_info;
4638
  int      error= 1;
4639
4640
  DBUG_ASSERT(total_ha_2pc > 1);
4641
  DBUG_ASSERT(opt_name && opt_name[0]);
4642
4643
  pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
4644
  pthread_cond_init (&COND_prep_xids, 0);
4645
4646
  if (!my_b_inited(&index_file))
4647
  {
4648
    /* There was a failure to open the index file, can't open the binlog */
4649
    cleanup();
4650
    return 1;
4651
  }
4652
4653
  if (using_heuristic_recover())
4654
  {
4655
    /* generate a new binlog to mask a corrupted one */
4656
    open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
4657
    cleanup();
4658
    return 1;
4659
  }
4660
4661
  if ((error= find_log_pos(&log_info, NullS, 1)))
4662
  {
4663
    if (error != LOG_INFO_EOF)
4664
      sql_print_error("find_log_pos() failed (error: %d)", error);
4665
    else
4666
      error= 0;
4667
    goto err;
4668
  }
4669
4670
  {
4671
    const char *errmsg;
4672
    IO_CACHE    log;
4673
    File        file;
4674
    Log_event  *ev=0;
4675
    Format_description_log_event fdle(BINLOG_VERSION);
4676
    char        log_name[FN_REFLEN];
4677
4678
    if (! fdle.is_valid())
4679
      goto err;
4680
4681
    do
4682
    {
4683
      strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
4684
    } while (!(error= find_next_log(&log_info, 1)));
4685
4686
    if (error !=  LOG_INFO_EOF)
4687
    {
4688
      sql_print_error("find_log_pos() failed (error: %d)", error);
4689
      goto err;
4690
    }
4691
4692
    if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
4693
    {
4694
      sql_print_error("%s", errmsg);
4695
      goto err;
4696
    }
4697
4698
    if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
4699
        ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4700
        ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
4701
    {
4702
      sql_print_information("Recovering after a crash using %s", opt_name);
4703
      error= recover(&log, (Format_description_log_event *)ev);
4704
    }
4705
    else
4706
      error=0;
4707
4708
    delete ev;
4709
    end_io_cache(&log);
4710
    my_close(file, MYF(MY_WME));
4711
4712
    if (error)
4713
      goto err;
4714
  }
4715
4716
err:
4717
  return error;
4718
}
4719
4720
/** This is called on shutdown, after ha_panic. */
4721
void TC_LOG_BINLOG::close()
4722
{
4723
  DBUG_ASSERT(prepared_xids==0);
4724
  pthread_mutex_destroy(&LOCK_prep_xids);
4725
  pthread_cond_destroy (&COND_prep_xids);
4726
}
4727
4728
/**
4729
  @todo
4730
  group commit
4731
4732
  @retval
4733
    0    error
4734
  @retval
4735
    1    success
4736
*/
4737
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
4738
{
4739
  DBUG_ENTER("TC_LOG_BINLOG::log");
4740
  Xid_log_event xle(thd, xid);
4741
  binlog_trx_data *trx_data=
4742
    (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
4743
  /*
4744
    We always commit the entire transaction when writing an XID. Also
4745
    note that the return value is inverted.
4746
   */
4747
  DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE));
4748
}
4749
77.1.45 by Monty Taylor
Warning fixes.
4750
void TC_LOG_BINLOG::unlog(ulong cookie __attribute__((__unused__)),
4751
                          my_xid xid __attribute__((__unused__)))
1 by brian
clean slate
4752
{
4753
  pthread_mutex_lock(&LOCK_prep_xids);
4754
  DBUG_ASSERT(prepared_xids > 0);
4755
  if (--prepared_xids == 0) {
4756
    DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
4757
    pthread_cond_signal(&COND_prep_xids);
4758
  }
4759
  pthread_mutex_unlock(&LOCK_prep_xids);
4760
  rotate_and_purge(0);     // as ::write() did not rotate
4761
}
4762
4763
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
4764
{
4765
  Log_event  *ev;
4766
  HASH xids;
4767
  MEM_ROOT mem_root;
4768
4769
  if (! fdle->is_valid() ||
4770
      hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
4771
                sizeof(my_xid), 0, 0, MYF(0)))
4772
    goto err1;
4773
4774
  init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
4775
4776
  fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
4777
4778
  while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
4779
  {
4780
    if (ev->get_type_code() == XID_EVENT)
4781
    {
4782
      Xid_log_event *xev=(Xid_log_event *)ev;
4783
      uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
4784
                                      sizeof(xev->xid));
4785
      if (! x)
4786
        goto err2;
4787
      my_hash_insert(&xids, x);
4788
    }
4789
    delete ev;
4790
  }
4791
4792
  if (ha_recover(&xids))
4793
    goto err2;
4794
4795
  free_root(&mem_root, MYF(0));
4796
  hash_free(&xids);
4797
  return 0;
4798
4799
err2:
4800
  free_root(&mem_root, MYF(0));
4801
  hash_free(&xids);
4802
err1:
4803
  sql_print_error("Crash recovery failed. Either correct the problem "
4804
                  "(if it's, for example, out of memory error) and restart, "
4805
                  "or delete (or rename) binary log and start mysqld with "
4806
                  "--tc-heuristic-recover={commit|rollback}");
4807
  return 1;
4808
}
4809
4810
4811
#ifdef INNODB_COMPATIBILITY_HOOKS
4812
/**
4813
  Get the file name of the MySQL binlog.
4814
  @return the name of the binlog file
4815
*/
4816
extern "C"
4817
const char* mysql_bin_log_file_name(void)
4818
{
4819
  return mysql_bin_log.get_log_fname();
4820
}
4821
/**
4822
  Get the current position of the MySQL binlog.
4823
  @return byte offset from the beginning of the binlog
4824
*/
4825
extern "C"
4826
ulonglong mysql_bin_log_file_pos(void)
4827
{
4828
  return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
4829
}
4830
#endif /* INNODB_COMPATIBILITY_HOOKS */
4831
4832
4833
struct st_mysql_storage_engine binlog_storage_engine=
4834
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
4835
4836
mysql_declare_plugin(binlog)
4837
{
4838
  MYSQL_STORAGE_ENGINE_PLUGIN,
4839
  &binlog_storage_engine,
4840
  "binlog",
4841
  "MySQL AB",
4842
  "This is a pseudo storage engine to represent the binlog in a transaction",
4843
  PLUGIN_LICENSE_GPL,
4844
  binlog_init, /* Plugin Init */
4845
  NULL, /* Plugin Deinit */
4846
  0x0100 /* 1.0 */,
4847
  NULL,                       /* status variables                */
4848
  NULL,                       /* system variables                */
4849
  NULL                        /* config options                  */
4850
}
4851
mysql_declare_plugin_end;