1
/* Copyright (C) 2000-2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
24
Abort logging when we get an error in reading or writing log files
27
#include "mysql_priv.h"
29
#include "rpl_filter.h"
34
#include <m_ctype.h> // For test_if_number
36
#include <mysql/plugin.h>
38
/* max size of the log message */
39
#define MAX_LOG_BUFFER_SIZE 1024
40
#define MAX_USER_HOST_SIZE 512
41
#define MAX_TIME_SIZE 32
42
#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
44
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
48
MYSQL_BIN_LOG mysql_bin_log;
49
ulong sync_binlog_counter= 0;
51
static bool test_if_number(const char *str,
52
long *res, bool allow_wildcards);
53
static int binlog_init(void *p);
54
static int binlog_close_connection(handlerton *hton, THD *thd);
55
static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
56
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
57
static int binlog_commit(handlerton *hton, THD *thd, bool all);
58
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
59
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
62
Silence all errors and warnings reported when performing a write
64
Errors and warnings are not reported to the client or SQL exception
65
handlers, so that the presence of logging does not interfere and affect
66
the logic of an application.
68
class Silence_log_table_errors : public Internal_error_handler
70
char m_message[MYSQL_ERRMSG_SIZE];
72
Silence_log_table_errors()
77
virtual ~Silence_log_table_errors() {}
79
virtual bool handle_error(uint sql_errno, const char *message,
80
MYSQL_ERROR::enum_warning_level level,
82
const char *message() const { return m_message; }
86
Silence_log_table_errors::handle_error(uint /* sql_errno */,
87
const char *message_arg,
88
MYSQL_ERROR::enum_warning_level /* level */,
91
strmake(m_message, message_arg, sizeof(m_message)-1);
96
sql_print_message_func sql_print_message_handlers[3] =
98
sql_print_information,
104
char *make_default_log_name(char *buff,const char* log_ext)
106
strmake(buff, pidfile_name, FN_REFLEN-5);
107
return fn_format(buff, buff, mysql_data_home, log_ext,
108
MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
112
Helper class to hold a mutex for the duration of the
115
Eliminates the need for explicit unlocking of mutexes on, e.g.,
116
error returns. On passing a null pointer, the sentry will not do
122
Mutex_sentry(pthread_mutex_t *mutex)
126
pthread_mutex_lock(mutex);
132
pthread_mutex_unlock(m_mutex);
139
pthread_mutex_t *m_mutex;
141
// It's not allowed to copy this object in any way
142
Mutex_sentry(Mutex_sentry const&);
143
void operator=(Mutex_sentry const&);
147
Helper class to store binary log transaction data.
149
class binlog_trx_data {
152
: at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
154
trans_log.end_of_file= max_binlog_cache_size;
159
DBUG_ASSERT(pending() == NULL);
160
close_cached_file(&trans_log);
163
my_off_t position() const {
164
return my_b_tell(&trans_log);
169
return pending() == NULL && my_b_tell(&trans_log) == 0;
173
Truncate the transaction cache to a certain position. This
174
includes deleting the pending event.
176
void truncate(my_off_t pos)
178
DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
179
DBUG_PRINT("info", ("before_stmt_pos=%lu", (ulong) pos));
182
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
183
if (pos < before_stmt_pos)
184
before_stmt_pos= MY_OFF_T_UNDEF;
187
The only valid positions that can be truncated to are at the
188
beginning of a statement. We are relying on this fact to be able
189
to set the at_least_one_stmt flag correctly. In other word, if
190
we are truncating to the beginning of the transaction cache,
191
there will be no statements in the cache, otherwhise, we will
192
have at least one statement in the transaction cache.
194
at_least_one_stmt= (pos > 0);
198
Reset the entire contents of the transaction cache, emptying it
204
before_stmt_pos= MY_OFF_T_UNDEF;
205
trans_log.end_of_file= max_binlog_cache_size;
208
Rows_log_event *pending() const
213
void set_pending(Rows_log_event *const pending)
218
IO_CACHE trans_log; // The transaction cache
221
Boolean that is true if there is at least one statement in the
224
bool at_least_one_stmt;
228
Pending binrows event. This event is the event where the rows are
231
Rows_log_event *m_pending;
235
Binlog position before the start of the current statement.
237
my_off_t before_stmt_pos;
240
handlerton *binlog_hton;
243
/* Check if a given table is opened log table */
244
int check_if_log_table(uint db_len, const char *db, uint table_name_len,
245
const char *table_name, uint check_if_opened)
250
/* log event handlers */
252
bool Log_to_file_event_handler::
253
log_error(enum loglevel level, const char *format,
256
return vprint_msg_to_log(level, format, args);
259
void Log_to_file_event_handler::init_pthread_objects()
261
mysql_log.init_pthread_objects();
262
mysql_slow_log.init_pthread_objects();
266
/** Wrapper around MYSQL_LOG::write() for slow log. */
268
bool Log_to_file_event_handler::
269
log_slow(THD *thd, time_t current_time, time_t query_start_arg,
270
const char *user_host, uint user_host_len,
271
ulonglong query_utime, ulonglong lock_utime, bool is_command,
272
const char *sql_text, uint sql_text_len)
274
return mysql_slow_log.write(thd, current_time, query_start_arg,
275
user_host, user_host_len,
276
query_utime, lock_utime, is_command,
277
sql_text, sql_text_len);
282
Wrapper around MYSQL_LOG::write() for general log. We need it since we
283
want all log event handlers to have the same signature.
286
bool Log_to_file_event_handler::
287
log_general(THD *thd, time_t event_time, const char *user_host,
288
uint user_host_len, int thread_id,
289
const char *command_type, uint command_type_len,
290
const char *sql_text, uint sql_text_len,
291
CHARSET_INFO *client_cs)
293
return mysql_log.write(event_time, user_host, user_host_len,
294
thread_id, command_type, command_type_len,
295
sql_text, sql_text_len);
299
bool Log_to_file_event_handler::init()
304
mysql_slow_log.open_slow_log(sys_var_slow_log_path.value);
307
mysql_log.open_query_log(sys_var_general_log_path.value);
309
is_initialized= TRUE;
316
void Log_to_file_event_handler::cleanup()
319
mysql_slow_log.cleanup();
322
void Log_to_file_event_handler::flush()
324
/* reopen log files */
326
mysql_log.reopen_file();
328
mysql_slow_log.reopen_file();
332
Log error with all enabled log event handlers
337
level The level of the error significance: NOTE,
339
format format string for the error message
340
args list of arguments for the format string
347
bool LOGGER::error_log_print(enum loglevel level, const char *format,
351
Log_event_handler **current_handler;
353
/* currently we don't need locking here as there is no error_log table */
354
for (current_handler= error_log_handler_list ; *current_handler ;)
355
error= (*current_handler++)->log_error(level, format, args) || error;
361
void LOGGER::cleanup_base()
363
DBUG_ASSERT(inited == 1);
364
rwlock_destroy(&LOCK_logger);
365
if (file_log_handler)
366
file_log_handler->cleanup();
370
void LOGGER::cleanup_end()
372
DBUG_ASSERT(inited == 1);
373
if (file_log_handler)
374
delete file_log_handler;
379
Perform basic log initialization: create file-based log handler and
382
void LOGGER::init_base()
384
DBUG_ASSERT(inited == 0);
388
Here we create file log handler. We don't do it for the table log handler
389
here as it cannot be created so early. The reason is THD initialization,
390
which depends on the system variables (parsed later).
392
if (!file_log_handler)
393
file_log_handler= new Log_to_file_event_handler;
395
/* by default we use traditional error log */
396
init_error_log(LOG_FILE);
398
file_log_handler->init_pthread_objects();
399
my_rwlock_init(&LOCK_logger, NULL);
403
bool LOGGER::flush_logs(THD *thd)
408
Now we lock logger, as nobody should be able to use logging routines while
409
log tables are closed
411
logger.lock_exclusive();
413
/* reopen log files */
414
file_log_handler->flush();
416
/* end of log flush */
423
Log slow query with all enabled log event handlers
428
thd THD of the query being logged
429
query The query being logged
430
query_length The length of the query string
431
current_utime Current time in microseconds (from undefined start)
438
bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
439
ulonglong current_utime)
443
Log_event_handler **current_handler;
444
bool is_command= FALSE;
445
char user_host_buff[MAX_USER_HOST_SIZE];
446
Security_context *sctx= thd->security_ctx;
447
uint user_host_len= 0;
448
ulonglong query_utime, lock_utime;
451
Print the message to the buffer if we have slow log enabled
454
if (*slow_log_handler_list)
458
/* do not log slow queries from replication threads */
459
if (thd->slave_thread && !opt_log_slow_slave_statements)
469
/* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
470
user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
471
sctx->priv_user ? sctx->priv_user : "", "[",
472
sctx->user ? sctx->user : "", "] @ ",
473
sctx->host ? sctx->host : "", " [",
474
sctx->ip ? sctx->ip : "", "]", NullS) -
477
current_time= my_time_possible_from_micro(current_utime);
478
if (thd->start_utime)
480
query_utime= (current_utime - thd->start_utime);
481
lock_utime= (thd->utime_after_lock - thd->start_utime);
485
query_utime= lock_utime= 0;
491
query= command_name[thd->command].str;
492
query_length= command_name[thd->command].length;
495
for (current_handler= slow_log_handler_list; *current_handler ;)
496
error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
497
user_host_buff, user_host_len,
498
query_utime, lock_utime, is_command,
499
query, query_length) || error;
506
bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
507
const char *query, uint query_length)
510
Log_event_handler **current_handler= general_log_handler_list;
511
char user_host_buff[MAX_USER_HOST_SIZE];
512
Security_context *sctx= thd->security_ctx;
514
uint user_host_len= 0;
518
id= thd->thread_id; /* Normal thread */
520
id= 0; /* Log from connect handler */
528
user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
529
sctx->priv_user ? sctx->priv_user : "", "[",
530
sctx->user ? sctx->user : "", "] @ ",
531
sctx->host ? sctx->host : "", " [",
532
sctx->ip ? sctx->ip : "", "]", NullS) -
535
current_time= my_time(0);
537
while (*current_handler)
538
error|= (*current_handler++)->
539
log_general(thd, current_time, user_host_buff,
541
command_name[(uint) command].str,
542
command_name[(uint) command].length,
544
thd->variables.character_set_client) || error;
550
bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
551
const char *format, va_list args)
553
uint message_buff_len= 0;
554
char message_buff[MAX_LOG_BUFFER_SIZE];
556
/* prepare message */
558
message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
561
message_buff[0]= '\0';
563
return general_log_write(thd, command, message_buff, message_buff_len);
566
void LOGGER::init_error_log(uint error_log_printer)
568
if (error_log_printer & LOG_NONE)
570
error_log_handler_list[0]= 0;
574
switch (error_log_printer) {
576
error_log_handler_list[0]= file_log_handler;
577
error_log_handler_list[1]= 0;
579
/* these two are disabled for now */
583
case LOG_TABLE|LOG_FILE:
589
void LOGGER::init_slow_log(uint slow_log_printer)
591
if (slow_log_printer & LOG_NONE)
593
slow_log_handler_list[0]= 0;
597
slow_log_handler_list[0]= file_log_handler;
598
slow_log_handler_list[1]= 0;
601
void LOGGER::init_general_log(uint general_log_printer)
603
if (general_log_printer & LOG_NONE)
605
general_log_handler_list[0]= 0;
609
general_log_handler_list[0]= file_log_handler;
610
general_log_handler_list[1]= 0;
614
bool LOGGER::activate_log_handler(THD* thd, uint log_type)
616
MYSQL_QUERY_LOG *file_log;
623
file_log= file_log_handler->get_mysql_slow_log();
625
file_log->open_slow_log(sys_var_slow_log_path.value);
626
init_slow_log(log_output_options);
630
case QUERY_LOG_GENERAL:
633
file_log= file_log_handler->get_mysql_log();
635
file_log->open_query_log(sys_var_general_log_path.value);
636
init_general_log(log_output_options);
648
void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
655
tmp_opt= &opt_slow_log;
656
file_log= file_log_handler->get_mysql_slow_log();
658
case QUERY_LOG_GENERAL:
660
file_log= file_log_handler->get_mysql_log();
663
assert(0); // Impossible
675
int LOGGER::set_handlers(uint error_log_printer,
676
uint slow_log_printer,
677
uint general_log_printer)
679
/* error log table is not supported yet */
680
DBUG_ASSERT(error_log_printer < LOG_TABLE);
684
init_error_log(error_log_printer);
685
init_slow_log(slow_log_printer);
686
init_general_log(general_log_printer);
695
Save position of binary log transaction cache.
698
binlog_trans_log_savepos()
700
thd The thread to take the binlog data from
701
pos Pointer to variable where the position will be stored
705
Save the current position in the binary log transaction cache into
706
the variable pointed to by 'pos'
710
binlog_trans_log_savepos(THD *thd, my_off_t *pos)
712
DBUG_ENTER("binlog_trans_log_savepos");
713
DBUG_ASSERT(pos != NULL);
714
if (thd_get_ha_data(thd, binlog_hton) == NULL)
715
thd->binlog_setup_trx_data();
716
binlog_trx_data *const trx_data=
717
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
718
DBUG_ASSERT(mysql_bin_log.is_open());
719
*pos= trx_data->position();
720
DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
726
Truncate the binary log transaction cache.
729
binlog_trans_log_truncate()
731
thd The thread to take the binlog data from
732
pos Position to truncate to
736
Truncate the binary log to the given position. Will not change
741
binlog_trans_log_truncate(THD *thd, my_off_t pos)
743
DBUG_ENTER("binlog_trans_log_truncate");
744
DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
746
DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
747
/* Only true if binlog_trans_log_savepos() wasn't called before */
748
DBUG_ASSERT(pos != ~(my_off_t) 0);
750
binlog_trx_data *const trx_data=
751
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
752
trx_data->truncate(pos);
758
this function is mostly a placeholder.
759
conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
760
should be moved here.
763
int binlog_init(void *p)
765
binlog_hton= (handlerton *)p;
766
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
767
binlog_hton->db_type=DB_TYPE_BINLOG;
768
binlog_hton->savepoint_offset= sizeof(my_off_t);
769
binlog_hton->close_connection= binlog_close_connection;
770
binlog_hton->savepoint_set= binlog_savepoint_set;
771
binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
772
binlog_hton->commit= binlog_commit;
773
binlog_hton->rollback= binlog_rollback;
774
binlog_hton->prepare= binlog_prepare;
775
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
779
static int binlog_close_connection(handlerton *hton, THD *thd)
781
binlog_trx_data *const trx_data=
782
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
783
DBUG_ASSERT(trx_data->empty());
784
thd_set_ha_data(thd, binlog_hton, NULL);
785
trx_data->~binlog_trx_data();
786
my_free((uchar*)trx_data, MYF(0));
796
thd The thread whose transaction should be ended
797
trx_data Pointer to the transaction data to use
798
end_ev The end event to use, or NULL
799
all True if the entire transaction should be ended, false if
800
only the statement transaction should be ended.
804
End the currently open transaction. The transaction can be either
805
a real transaction (if 'all' is true) or a statement transaction
808
If 'end_ev' is NULL, the transaction is a rollback of only
809
transactional tables, so the transaction cache will be truncated
810
to either just before the last opened statement transaction (if
811
'all' is false), or reset completely (if 'all' is true).
814
binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
815
Log_event *end_ev, bool all)
817
DBUG_ENTER("binlog_end_trans");
819
IO_CACHE *trans_log= &trx_data->trans_log;
820
DBUG_PRINT("enter", ("transaction: %s end_ev: 0x%lx",
821
all ? "all" : "stmt", (long) end_ev));
822
DBUG_PRINT("info", ("thd->options={ %s%s}",
823
FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
824
FLAGSTR(thd->options, OPTION_BEGIN)));
827
NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
828
only transactional tables. If the transaction contain changes to
829
any non-transactiona tables, we need write the transaction and log
835
Doing a commit or a rollback including non-transactional tables,
836
i.e., ending a transaction where we might write the transaction
837
cache to the binary log.
839
We can always end the statement when ending a transaction since
840
transactions are not allowed inside stored functions. If they
841
were, we would have to ensure that we're not ending a statement
842
inside a stored function.
844
thd->binlog_flush_pending_rows_event(TRUE);
846
error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
850
We need to step the table map version after writing the
851
transaction cache to disk.
853
mysql_bin_log.update_table_map_version();
854
statistic_increment(binlog_cache_use, &LOCK_status);
855
if (trans_log->disk_writes != 0)
857
statistic_increment(binlog_cache_disk_use, &LOCK_status);
858
trans_log->disk_writes= 0;
864
If rolling back an entire transaction or a single statement not
865
inside a transaction, we reset the transaction cache.
867
If rolling back a statement in a transaction, we truncate the
868
transaction cache to remove the statement.
870
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
874
DBUG_ASSERT(!thd->binlog_get_pending_rows_event());
875
thd->clear_binlog_table_maps();
878
trx_data->truncate(trx_data->before_stmt_pos);
881
We need to step the table map version on a rollback to ensure
882
that a new table map event is generated instead of the one that
883
was written to the thrown-away transaction cache.
885
mysql_bin_log.update_table_map_version();
891
static int binlog_prepare(handlerton *hton, THD *thd, bool all)
895
just pretend we can do 2pc, so that MySQL won't
897
real work will be done in MYSQL_BIN_LOG::log_xid()
902
#define YESNO(X) ((X) ? "yes" : "no")
905
This function is called once after each statement.
907
It has the responsibility to flush the transaction cache to the
908
binlog file on commits.
910
@param hton The binlog handlerton.
911
@param thd The client thread that executes the transaction.
912
@param all This is @c true if this is a real transaction commit, and
915
@see handlerton::commit
917
static int binlog_commit(handlerton *hton, THD *thd, bool all)
919
DBUG_ENTER("binlog_commit");
920
binlog_trx_data *const trx_data=
921
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
923
if (trx_data->empty())
925
// we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid()
931
Decision table for committing a transaction. The top part, the
932
*conditions* represent different cases that can occur, and hte
933
bottom part, the *actions*, represent what should be done in that
936
Real transaction 'all' was true
938
Statement in cache There were at least one statement in the
941
In transaction We are inside a transaction
943
Stmt modified non-trans The statement being committed modified a
944
non-transactional table
946
All modified non-trans Some statement before this one in the
947
transaction modified a non-transactional
951
============================= = = = = = = = = = = = = = = = =
952
Real transaction N N N N N N N N N N N N N N N N
953
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
954
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
955
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
956
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
958
Action: (C)ommit/(A)ccumulate C C - C A C - C - - - - A A - A
959
============================= = = = = = = = = = = = = = = = =
962
============================= = = = = = = = = = = = = = = = =
963
Real transaction Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y
964
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
965
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
966
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
967
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
969
(C)ommit/(A)ccumulate/(-) - - - - C C - C - - - - C C - C
970
============================= = = = = = = = = = = = = = = = =
972
In other words, we commit the transaction if and only if both of
973
the following are true:
974
- We are not in a transaction and committing a statement
976
- We are in a transaction and one (or more) of the following are
979
- A full transaction is committed
983
- A non-transactional statement is committed and there is
986
Otherwise, we accumulate the statement
988
ulonglong const in_transaction=
989
thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
991
("all: %d, empty: %s, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
993
YESNO(trx_data->empty()),
994
YESNO(in_transaction),
995
YESNO(thd->transaction.all.modified_non_trans_table),
996
YESNO(thd->transaction.stmt.modified_non_trans_table)));
997
if ((in_transaction && (all || (!trx_data->at_least_one_stmt && thd->transaction.stmt.modified_non_trans_table))) || (!in_transaction && !all))
999
Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE);
1000
qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
1001
int error= binlog_end_trans(thd, trx_data, &qev, all);
1008
This function is called when a transaction involving a transactional
1009
table is rolled back.
1011
It has the responsibility to flush the transaction cache to the
1012
binlog file. However, if the transaction does not involve
1013
non-transactional tables, nothing needs to be logged.
1015
@param hton The binlog handlerton.
1016
@param thd The client thread that executes the transaction.
1017
@param all This is @c true if this is a real transaction rollback, and
1020
@see handlerton::rollback
1022
static int binlog_rollback(handlerton *hton, THD *thd, bool all)
1024
DBUG_ENTER("binlog_rollback");
1026
binlog_trx_data *const trx_data=
1027
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
1029
if (trx_data->empty()) {
1034
DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
1036
YESNO(thd->transaction.all.modified_non_trans_table),
1037
YESNO(thd->transaction.stmt.modified_non_trans_table)));
1038
if ((all && thd->transaction.all.modified_non_trans_table) ||
1039
(!all && thd->transaction.stmt.modified_non_trans_table) ||
1040
(thd->options & OPTION_KEEP_LOG))
1043
We write the transaction cache with a rollback last if we have
1044
modified any non-transactional table. We do this even if we are
1045
committing a single statement that has modified a
1046
non-transactional table since it can have modified a
1047
transactional table in that statement as well, which needs to be
1048
rolled back on the slave.
1050
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE);
1051
qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
1052
error= binlog_end_trans(thd, trx_data, &qev, all);
1054
else if ((all && !thd->transaction.all.modified_non_trans_table) ||
1055
(!all && !thd->transaction.stmt.modified_non_trans_table))
1058
If we have modified only transactional tables, we can truncate
1059
the transaction cache without writing anything to the binary
1062
error= binlog_end_trans(thd, trx_data, 0, all);
1069
How do we handle this (unlikely but legal) case:
1071
[transaction] + [update to non-trans table] + [rollback to savepoint] ?
1073
The problem occurs when a savepoint is before the update to the
1074
non-transactional table. Then when there's a rollback to the savepoint, if we
1075
simply truncate the binlog cache, we lose the part of the binlog cache where
1076
the update is. If we want to not lose it, we need to write the SAVEPOINT
1077
command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1078
is easy: it's just write at the end of the binlog cache, but the former
1079
should be *inserted* to the place where the user called SAVEPOINT. The
1080
solution is that when the user calls SAVEPOINT, we write it to the binlog
1081
cache (so no need to later insert it). As transactions are never intermixed
1082
in the binary log (i.e. they are serialized), we won't have conflicts with
1083
savepoint names when using mysqlbinlog or in the slave SQL thread.
1084
Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1085
non-transactional table, we don't truncate the binlog cache but instead write
1086
ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1087
will chop the SAVEPOINT command from the binlog cache, which is good as in
1088
that case there is no need to have it in the binlog).
1091
static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
1093
DBUG_ENTER("binlog_savepoint_set");
1095
binlog_trans_log_savepos(thd, (my_off_t*) sv);
1096
/* Write it to the binary log */
1099
thd->binlog_query(THD::STMT_QUERY_TYPE,
1100
thd->query, thd->query_length, TRUE, FALSE);
1104
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
1106
DBUG_ENTER("binlog_savepoint_rollback");
1109
Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1110
non-transactional table. Otherwise, truncate the binlog cache starting
1111
from the SAVEPOINT command.
1113
if (unlikely(thd->transaction.all.modified_non_trans_table ||
1114
(thd->options & OPTION_KEEP_LOG)))
1117
thd->binlog_query(THD::STMT_QUERY_TYPE,
1118
thd->query, thd->query_length, TRUE, FALSE);
1121
binlog_trans_log_truncate(thd, *(my_off_t*)sv);
1126
int check_binlog_magic(IO_CACHE* log, const char** errmsg)
1129
DBUG_ASSERT(my_b_tell(log) == 0);
1131
if (my_b_read(log, (uchar*) magic, sizeof(magic)))
1133
*errmsg = "I/O error reading the header from the binary log";
1134
sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
1138
if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
1140
*errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
1147
File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
1150
DBUG_ENTER("open_binlog");
1152
if ((file = my_open(log_file_name, O_RDONLY | O_BINARY | O_SHARE,
1155
sql_print_error("Failed to open log (file '%s', errno %d)",
1156
log_file_name, my_errno);
1157
*errmsg = "Could not open log file";
1160
if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
1161
MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
1163
sql_print_error("Failed to create a cache on log (file '%s')",
1165
*errmsg = "Could not open log file";
1168
if (check_binlog_magic(log,errmsg))
1175
my_close(file,MYF(0));
1183
Find a unique filename for 'filename.#'.
1185
Set '#' to a number as low as possible.
1188
nonzero if not possible to get unique filename
1191
static int find_uniq_filename(char *name)
1195
char buff[FN_REFLEN];
1196
struct st_my_dir *dir_info;
1197
register struct fileinfo *file_info;
1199
size_t buf_length, length;
1201
DBUG_ENTER("find_uniq_filename");
1203
length= dirname_part(buff, name, &buf_length);
1204
start= name + length;
1208
length= (size_t) (end-start+1);
1210
if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
1211
{ // This shouldn't happen
1212
strmov(end,".1"); // use name+1
1215
file_info= dir_info->dir_entry;
1216
for (i=dir_info->number_off_files ; i-- ; file_info++)
1218
if (bcmp((uchar*) file_info->name, (uchar*) start, length) == 0 &&
1219
test_if_number(file_info->name+length, &number,0))
1221
set_if_bigger(max_found,(ulong) number);
1224
my_dirend(dir_info);
1227
sprintf(end,"%06ld",max_found+1);
1232
void MYSQL_LOG::init(enum_log_type log_type_arg,
1233
enum cache_type io_cache_type_arg)
1235
DBUG_ENTER("MYSQL_LOG::init");
1236
log_type= log_type_arg;
1237
io_cache_type= io_cache_type_arg;
1238
DBUG_PRINT("info",("log_type: %d", log_type));
1244
Open a (new) log file.
1249
log_name The name of the log to open
1250
log_type_arg The type of the log. E.g. LOG_NORMAL
1251
new_name The new name for the logfile. This is only needed
1252
when the method is used to open the binlog file.
1253
io_cache_type_arg The type of the IO_CACHE to use for this log file
1256
Open the logfile, init IO_CACHE and write startup messages
1257
(in case of general and slow query logs).
1264
bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
1265
const char *new_name, enum cache_type io_cache_type_arg)
1267
char buff[FN_REFLEN];
1269
int open_flags= O_CREAT | O_BINARY;
1270
DBUG_ENTER("MYSQL_LOG::open");
1271
DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
1275
init(log_type_arg, io_cache_type_arg);
1277
if (!(name= my_strdup(log_name, MYF(MY_WME))))
1279
name= (char *)log_name; // for the error message
1284
strmov(log_file_name, new_name);
1285
else if (generate_new_name(log_file_name, name))
1288
if (io_cache_type == SEQ_READ_APPEND)
1289
open_flags |= O_RDWR | O_APPEND;
1291
open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
1295
if ((file= my_open(log_file_name, open_flags,
1296
MYF(MY_WME | ME_WAITTANG))) < 0 ||
1297
init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
1298
my_tell(file, MYF(MY_WME)), 0,
1299
MYF(MY_WME | MY_NABP |
1300
((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
1303
if (log_type == LOG_NORMAL)
1306
int len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
1307
"started with:\nTCP Port: %d, Named Pipe: %s\n",
1308
my_progname, server_version, MYSQL_COMPILATION_COMMENT,
1309
mysqld_port, mysqld_unix_port
1311
end= strnmov(buff + len, "Time Id Command Argument\n",
1312
sizeof(buff) - len);
1313
if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
1314
flush_io_cache(&log_file))
1318
log_state= LOG_OPENED;
1322
sql_print_error("Could not use %s for logging (error %d). \
1323
Turning logging off for the whole duration of the MySQL server process. \
1324
To turn it on again: fix the cause, \
1325
shutdown the MySQL server and restart it.", name, errno);
1327
my_close(file, MYF(0));
1328
end_io_cache(&log_file);
1330
log_state= LOG_CLOSED;
1334
MYSQL_LOG::MYSQL_LOG()
1335
: name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
1336
log_state(LOG_CLOSED)
1339
We don't want to initialize LOCK_Log here as such initialization depends on
1340
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1341
called only in main(). Doing initialization here would make it happen
1344
bzero((char*) &log_file, sizeof(log_file));
1347
void MYSQL_LOG::init_pthread_objects()
1349
DBUG_ASSERT(inited == 0);
1351
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1359
exiting Bitmask. For the slow and general logs the only used bit is
1360
LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call
1361
open at once after close.
1364
One can do an open on the object at once after doing a close.
1365
The internal structures are not freed until cleanup() is called
1368
void MYSQL_LOG::close(uint exiting)
1369
{ // One can't set log_type here!
1370
DBUG_ENTER("MYSQL_LOG::close");
1371
DBUG_PRINT("enter",("exiting: %d", (int) exiting));
1372
if (log_state == LOG_OPENED)
1374
end_io_cache(&log_file);
1376
if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error)
1379
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1382
if (my_close(log_file.file, MYF(MY_WME)) && ! write_error)
1385
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1389
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
1394
/** This is called only once. */
1396
void MYSQL_LOG::cleanup()
1398
DBUG_ENTER("cleanup");
1402
(void) pthread_mutex_destroy(&LOCK_log);
1409
int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name)
1411
fn_format(new_name, log_name, mysql_data_home, "", 4);
1412
if (log_type == LOG_BIN)
1414
if (!fn_ext(log_name)[0])
1416
if (find_uniq_filename(new_name))
1418
sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
1434
Reopen the log file. The method is used during FLUSH LOGS
1435
and locks LOCK_log mutex
1439
void MYSQL_QUERY_LOG::reopen_file()
1443
DBUG_ENTER("MYSQL_LOG::reopen_file");
1446
DBUG_PRINT("info",("log is closed"));
1450
pthread_mutex_lock(&LOCK_log);
1453
name= 0; // Don't free name
1454
close(LOG_CLOSE_TO_BE_OPENED);
1457
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
1460
open(save_name, log_type, 0, io_cache_type);
1461
my_free(save_name, MYF(0));
1463
pthread_mutex_unlock(&LOCK_log);
1470
Write a command to traditional general log file
1475
event_time command start timestamp
1476
user_host the pointer to the string with user@host info
1477
user_host_len length of the user_host string. this is computed once
1478
and passed to all general log event handlers
1479
thread_id Id of the thread, issued a query
1480
command_type the type of the command being logged
1481
command_type_len the length of the string above
1482
sql_text the very text of the query being executed
1483
sql_text_len the length of sql_text string
1487
Log given command to to normal (not rotable) log file
1491
TRUE - error occured
1494
bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host,
1495
uint user_host_len, int thread_id,
1496
const char *command_type, uint command_type_len,
1497
const char *sql_text, uint sql_text_len)
1501
char local_time_buff[MAX_TIME_SIZE];
1503
uint time_buff_len= 0;
1505
(void) pthread_mutex_lock(&LOCK_log);
1507
/* Test if someone closed between the is_open test and lock */
1510
/* Note that my_b_write() assumes it knows the length for this */
1511
if (event_time != last_time)
1513
last_time= event_time;
1515
localtime_r(&event_time, &start);
1517
time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
1518
"%02d%02d%02d %2d:%02d:%02d",
1519
start.tm_year % 100, start.tm_mon + 1,
1520
start.tm_mday, start.tm_hour,
1521
start.tm_min, start.tm_sec);
1523
if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
1527
if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
1530
/* command_type, thread_id */
1531
length= my_snprintf(buff, 32, "%5ld ", (long) thread_id);
1533
if (my_b_write(&log_file, (uchar*) buff, length))
1536
if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
1539
if (my_b_write(&log_file, (uchar*) "\t", 1))
1543
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
1546
if (my_b_write(&log_file, (uchar*) "\n", 1) ||
1547
flush_io_cache(&log_file))
1551
(void) pthread_mutex_unlock(&LOCK_log);
1558
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1560
(void) pthread_mutex_unlock(&LOCK_log);
1566
Log a query to the traditional slow log file
1571
thd THD of the query
1572
current_time current timestamp
1573
query_start_arg command start timestamp
1574
user_host the pointer to the string with user@host info
1575
user_host_len length of the user_host string. this is computed once
1576
and passed to all general log event handlers
1577
query_utime Amount of time the query took to execute (in microseconds)
1578
lock_utime Amount of time the query was locked (in microseconds)
1579
is_command The flag, which determines, whether the sql_text is a
1580
query or an administrator command.
1581
sql_text the very text of the query or administrator command
1583
sql_text_len the length of sql_text string
1587
Log a query to the slow log file.
1591
TRUE - error occured
1594
bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
1595
time_t query_start_arg, const char *user_host,
1596
uint user_host_len, ulonglong query_utime,
1597
ulonglong lock_utime, bool is_command,
1598
const char *sql_text, uint sql_text_len)
1601
DBUG_ENTER("MYSQL_QUERY_LOG::write");
1603
(void) pthread_mutex_lock(&LOCK_log);
1607
(void) pthread_mutex_unlock(&LOCK_log);
1612
{ // Safety agains reopen
1614
char buff[80], *end;
1615
char query_time_buff[22+7], lock_time_buff[22+7];
1619
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1621
if (current_time != last_time)
1623
last_time= current_time;
1625
localtime_r(¤t_time, &start);
1627
buff_len= my_snprintf(buff, sizeof buff,
1628
"# Time: %02d%02d%02d %2d:%02d:%02d\n",
1629
start.tm_year % 100, start.tm_mon + 1,
1630
start.tm_mday, start.tm_hour,
1631
start.tm_min, start.tm_sec);
1633
/* Note that my_b_write() assumes it knows the length for this */
1634
if (my_b_write(&log_file, (uchar*) buff, buff_len))
1637
const uchar uh[]= "# User@Host: ";
1638
if (my_b_write(&log_file, uh, sizeof(uh) - 1))
1640
if (my_b_write(&log_file, (uchar*) user_host, user_host_len))
1642
if (my_b_write(&log_file, (uchar*) "\n", 1))
1645
/* For slow query log */
1646
sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
1647
sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
1648
if (my_b_printf(&log_file,
1649
"# Query_time: %s Lock_time: %s"
1650
" Rows_sent: %lu Rows_examined: %lu\n",
1651
query_time_buff, lock_time_buff,
1652
(ulong) thd->sent_row_count,
1653
(ulong) thd->examined_row_count) == (uint) -1)
1655
if (thd->db && strcmp(thd->db, db))
1656
{ // Database changed
1657
if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
1661
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
1663
end=strmov(end, ",last_insert_id=");
1664
end=longlong10_to_str((longlong)
1665
thd->first_successful_insert_id_in_prev_stmt_for_binlog,
1668
// Save value if we do an insert.
1669
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
1671
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1673
end=strmov(end,",insert_id=");
1674
end=longlong10_to_str((longlong)
1675
thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
1681
This info used to show up randomly, depending on whether the query
1682
checked the query start time or not. now we always write current
1683
timestamp to the slow log
1685
end= strmov(end, ",timestamp=");
1686
end= int10_to_str((long) current_time, end, 10);
1692
if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
1693
my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
1698
end= strxmov(buff, "# administrator command: ", NullS);
1699
buff_len= (ulong) (end - buff);
1700
my_b_write(&log_file, (uchar*) buff, buff_len);
1702
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
1703
my_b_write(&log_file, (uchar*) ";\n",2) ||
1704
flush_io_cache(&log_file))
1712
sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
1716
(void) pthread_mutex_unlock(&LOCK_log);
1723
The following should be using fn_format(); We just need to
1724
first change fn_format() to cut the file name if it's too long.
1726
const char *MYSQL_LOG::generate_name(const char *log_name,
1728
bool strip_ext, char *buff)
1730
if (!log_name || !log_name[0])
1732
strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
1733
return (const char *)
1734
fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
1736
// get rid of extension if the log is binary to avoid problems
1739
char *p= fn_ext(log_name);
1740
uint length= (uint) (p - log_name);
1741
strmake(buff, log_name, min(length, FN_REFLEN));
1742
return (const char*)buff;
1749
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
1750
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
1751
need_start_event(TRUE), m_table_map_version(0),
1752
description_event_for_exec(0), description_event_for_queue(0)
1755
We don't want to initialize locks here as such initialization depends on
1756
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1757
called only in main(). Doing initialization here would make it happen
1760
index_file_name[0] = 0;
1761
bzero((char*) &index_file, sizeof(index_file));
1764
/* this is called only once */
1766
void MYSQL_BIN_LOG::cleanup()
1768
DBUG_ENTER("cleanup");
1772
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
1773
delete description_event_for_queue;
1774
delete description_event_for_exec;
1775
(void) pthread_mutex_destroy(&LOCK_log);
1776
(void) pthread_mutex_destroy(&LOCK_index);
1777
(void) pthread_cond_destroy(&update_cond);
1783
/* Init binlog-specific vars */
1784
void MYSQL_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
1786
DBUG_ENTER("MYSQL_BIN_LOG::init");
1787
no_auto_events= no_auto_events_arg;
1788
max_size= max_size_arg;
1789
DBUG_PRINT("info",("max_size: %lu", max_size));
1794
void MYSQL_BIN_LOG::init_pthread_objects()
1796
DBUG_ASSERT(inited == 0);
1798
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1799
(void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
1800
(void) pthread_cond_init(&update_cond, 0);
1804
bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
1805
const char *log_name)
1807
File index_file_nr= -1;
1808
DBUG_ASSERT(!my_b_inited(&index_file));
1811
First open of this class instance
1812
Create an index file that will hold all file names uses for logging.
1813
Add new entries to the end of it.
1815
myf opt= MY_UNPACK_FILENAME;
1816
if (!index_file_name_arg)
1818
index_file_name_arg= log_name; // Use same basename for index file
1819
opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
1821
fn_format(index_file_name, index_file_name_arg, mysql_data_home,
1823
if ((index_file_nr= my_open(index_file_name,
1824
O_RDWR | O_CREAT | O_BINARY ,
1825
MYF(MY_WME))) < 0 ||
1826
my_sync(index_file_nr, MYF(MY_WME)) ||
1827
init_io_cache(&index_file, index_file_nr,
1828
IO_SIZE, WRITE_CACHE,
1829
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
1830
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
1833
TODO: all operations creating/deleting the index file or a log, should
1834
call my_sync_dir() or my_sync_dir_by_file() to be durable.
1835
TODO: file creation should be done with my_create() not my_open().
1837
if (index_file_nr >= 0)
1838
my_close(index_file_nr,MYF(0));
1846
Open a (new) binlog file.
1848
- Open the log file and the index file. Register the new
1850
- When calling this when the file is in use, you must have a locks
1851
on LOCK_log and LOCK_index.
1859
bool MYSQL_BIN_LOG::open(const char *log_name,
1860
enum_log_type log_type_arg,
1861
const char *new_name,
1862
enum cache_type io_cache_type_arg,
1863
bool no_auto_events_arg,
1865
bool null_created_arg)
1868
DBUG_ENTER("MYSQL_BIN_LOG::open");
1869
DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
1873
/* open the main log file */
1874
if (MYSQL_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
1875
DBUG_RETURN(1); /* all warnings issued */
1877
init(no_auto_events_arg, max_size_arg);
1881
DBUG_ASSERT(log_type == LOG_BIN);
1884
bool write_file_name_to_index_file=0;
1886
if (!my_b_filelength(&log_file))
1889
The binary log file was empty (probably newly created)
1890
This is the normal case and happens when the user doesn't specify
1891
an extension for the binary log files.
1892
In this case we write a standard header to it.
1894
if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
1895
BIN_LOG_HEADER_SIZE))
1897
bytes_written+= BIN_LOG_HEADER_SIZE;
1898
write_file_name_to_index_file= 1;
1901
DBUG_ASSERT(my_b_inited(&index_file) != 0);
1902
reinit_io_cache(&index_file, WRITE_CACHE,
1903
my_b_filelength(&index_file), 0, 0);
1904
if (need_start_event && !no_auto_events)
1907
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
1908
even if this is not the very first binlog.
1910
Format_description_log_event s(BINLOG_VERSION);
1912
don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
1913
as we won't be able to reset it later
1915
if (io_cache_type == WRITE_CACHE)
1916
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
1919
s.dont_set_created= null_created_arg;
1920
if (s.write(&log_file))
1922
bytes_written+= s.data_written;
1924
if (description_event_for_queue &&
1925
description_event_for_queue->binlog_version>=4)
1928
This is a relay log written to by the I/O slave thread.
1929
Write the event so that others can later know the format of this relay
1931
Note that this event is very close to the original event from the
1932
master (it has binlog version of the master, event types of the
1933
master), so this is suitable to parse the next relay log's event. It
1934
has been produced by
1935
Format_description_log_event::Format_description_log_event(char* buf,).
1936
Why don't we want to write the description_event_for_queue if this
1937
event is for format<4 (3.23 or 4.x): this is because in that case, the
1938
description_event_for_queue describes the data received from the
1939
master, but not the data written to the relay log (*conversion*),
1940
which is in format 4 (slave's).
1943
Set 'created' to 0, so that in next relay logs this event does not
1944
trigger cleaning actions on the slave in
1945
Format_description_log_event::apply_event_impl().
1947
description_event_for_queue->created= 0;
1948
/* Don't set log_pos in event header */
1949
description_event_for_queue->artificial_event=1;
1951
if (description_event_for_queue->write(&log_file))
1953
bytes_written+= description_event_for_queue->data_written;
1955
if (flush_io_cache(&log_file) ||
1956
my_sync(log_file.file, MYF(MY_WME)))
1959
if (write_file_name_to_index_file)
1962
As this is a new log file, we write the file name to the index
1963
file. As every time we write to the index file, we sync it.
1965
if (my_b_write(&index_file, (uchar*) log_file_name,
1966
strlen(log_file_name)) ||
1967
my_b_write(&index_file, (uchar*) "\n", 1) ||
1968
flush_io_cache(&index_file) ||
1969
my_sync(index_file.file, MYF(MY_WME)))
1973
log_state= LOG_OPENED;
1978
sql_print_error("Could not use %s for logging (error %d). \
1979
Turning logging off for the whole duration of the MySQL server process. \
1980
To turn it on again: fix the cause, \
1981
shutdown the MySQL server and restart it.", name, errno);
1983
my_close(file,MYF(0));
1984
end_io_cache(&log_file);
1985
end_io_cache(&index_file);
1987
log_state= LOG_CLOSED;
1992
int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
1994
pthread_mutex_lock(&LOCK_log);
1995
int ret = raw_get_current_log(linfo);
1996
pthread_mutex_unlock(&LOCK_log);
2000
int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
2002
strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
2003
linfo->pos = my_b_tell(&log_file);
2008
Move all data up in a file in an filename index file.
2010
We do the copy outside of the IO_CACHE as the cache buffers would just
2011
make things slower and more complicated.
2012
In most cases the copy loop should only do one read.
2014
@param index_file File to move
2015
@param offset Move everything from here to beginning
2018
File will be truncated to be 'offset' shorter or filled up with newlines
2024
#ifdef HAVE_REPLICATION
2026
static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
2029
my_off_t init_offset= offset;
2030
File file= index_file->file;
2031
uchar io_buf[IO_SIZE*2];
2032
DBUG_ENTER("copy_up_file_and_fill");
2034
for (;; offset+= bytes_read)
2036
(void) my_seek(file, offset, MY_SEEK_SET, MYF(0));
2037
if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
2041
break; // end of file
2042
(void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
2043
if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
2046
/* The following will either truncate the file or fill the end with \n' */
2047
if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
2048
my_sync(file, MYF(MY_WME)))
2051
/* Reset data in old index cache */
2052
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
2059
#endif /* HAVE_REPLICATION */
2062
Find the position in the log-index-file for the given log name.
2064
@param linfo Store here the found log file name and position to
2065
the NEXT log file name in the index file.
2066
@param log_name Filename to find in the index file.
2067
Is a null pointer if we want to read the first entry
2068
@param need_lock Set this to 1 if the parent doesn't already have a
2072
On systems without the truncate function the file will end with one or
2073
more empty lines. These will be ignored when reading the file.
2078
LOG_INFO_EOF End of log-index-file found
2080
LOG_INFO_IO Got IO error while reading file
2083
int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
2087
char *fname= linfo->log_file_name;
2088
uint log_name_len= log_name ? (uint) strlen(log_name) : 0;
2089
DBUG_ENTER("find_log_pos");
2090
DBUG_PRINT("enter",("log_name: %s", log_name ? log_name : "NULL"));
2093
Mutex needed because we need to make sure the file pointer does not
2094
move from under our feet
2097
pthread_mutex_lock(&LOCK_index);
2098
safe_mutex_assert_owner(&LOCK_index);
2100
/* As the file is flushed, we can't get an error here */
2101
(void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
2106
my_off_t offset= my_b_tell(&index_file);
2107
/* If we get 0 or 1 characters, this is the end of the file */
2109
if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2111
/* Did not find the given entry; Return not found or error */
2112
error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2116
// if the log entry matches, null string matching anything
2118
(log_name_len == length-1 && fname[log_name_len] == '\n' &&
2119
!memcmp(fname, log_name, log_name_len)))
2121
DBUG_PRINT("info",("Found log file entry"));
2122
fname[length-1]=0; // remove last \n
2123
linfo->index_file_start_offset= offset;
2124
linfo->index_file_offset = my_b_tell(&index_file);
2130
pthread_mutex_unlock(&LOCK_index);
2136
Find the position in the log-index-file for the given log name.
2139
linfo Store here the next log file name and position to
2140
the file name after that.
2142
need_lock Set this to 1 if the parent doesn't already have a
2146
- Before calling this function, one has to call find_log_pos()
2148
- Mutex needed because we need to make sure the file pointer does not move
2154
LOG_INFO_EOF End of log-index-file found
2156
LOG_INFO_IO Got IO error while reading file
2159
int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
2163
char *fname= linfo->log_file_name;
2166
pthread_mutex_lock(&LOCK_index);
2167
safe_mutex_assert_owner(&LOCK_index);
2169
/* As the file is flushed, we can't get an error here */
2170
(void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
2173
linfo->index_file_start_offset= linfo->index_file_offset;
2174
if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2176
error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2179
fname[length-1]=0; // kill \n
2180
linfo->index_file_offset = my_b_tell(&index_file);
2184
pthread_mutex_unlock(&LOCK_index);
2190
Delete all logs refered to in the index file.
2191
Start writing to a new log file.
2193
The new index file will only contain this file.
2198
If not called from slave thread, write start event to new log
2206
bool MYSQL_BIN_LOG::reset_logs(THD* thd)
2210
const char* save_name;
2211
DBUG_ENTER("reset_logs");
2215
We need to get both locks to be sure that no one is trying to
2216
write to the index log file.
2218
pthread_mutex_lock(&LOCK_log);
2219
pthread_mutex_lock(&LOCK_index);
2222
The following mutex is needed to ensure that no threads call
2223
'delete thd' as we would then risk missing a 'rollback' from this
2224
thread. If the transaction involved MyISAM tables, it should go
2225
into binlog even on rollback.
2227
VOID(pthread_mutex_lock(&LOCK_thread_count));
2229
/* Save variables so that we can reopen the log */
2231
name=0; // Protect against free
2232
close(LOG_CLOSE_TO_BE_OPENED);
2234
/* First delete all old log files */
2236
if (find_log_pos(&linfo, NullS, 0))
2244
if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
2246
if (my_errno == ENOENT)
2248
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2249
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2250
linfo.log_file_name);
2251
sql_print_information("Failed to delete file '%s'",
2252
linfo.log_file_name);
2258
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2259
ER_BINLOG_PURGE_FATAL_ERR,
2260
"a problem with deleting %s; "
2261
"consider examining correspondence "
2262
"of your binlog index file "
2263
"to the actual binlog files",
2264
linfo.log_file_name);
2269
if (find_next_log(&linfo, 0))
2273
/* Start logging with a new file */
2274
close(LOG_CLOSE_INDEX);
2275
if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
2277
if (my_errno == ENOENT)
2279
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2280
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2282
sql_print_information("Failed to delete file '%s'",
2289
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2290
ER_BINLOG_PURGE_FATAL_ERR,
2291
"a problem with deleting %s; "
2292
"consider examining correspondence "
2293
"of your binlog index file "
2294
"to the actual binlog files",
2300
if (!thd->slave_thread)
2302
if (!open_index_file(index_file_name, 0))
2303
open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0);
2304
my_free((uchar*) save_name, MYF(0));
2307
VOID(pthread_mutex_unlock(&LOCK_thread_count));
2308
pthread_mutex_unlock(&LOCK_index);
2309
pthread_mutex_unlock(&LOCK_log);
2315
Delete relay log files prior to rli->group_relay_log_name
2316
(i.e. all logs which are not involved in a non-finished group
2317
(transaction)), remove them from the index file and start on next
2321
- Protects index file with LOCK_index
2322
- Delete relevant relay log files
2323
- Copy all file names after these ones to the front of the index file
2324
- If the OS has truncate, truncate the file, else fill it with \n'
2325
- Read the next file name from the index file and store in rli->linfo
2327
@param rli Relay log information
2328
@param included If false, all relay logs that are strictly before
2329
rli->group_relay_log_name are deleted ; if true, the
2330
latter is deleted too (i.e. all relay logs
2331
read by the SQL slave thread are deleted).
2334
- This is only called from the slave-execute thread when it has read
2335
all commands from a relay log and want to switch to a new relay log.
2336
- When this happens, we can be in an active transaction as
2337
a transaction can span over two relay logs
2338
(although it is always written as a single block to the master's binary
2339
log, hence cannot span over two master's binary logs).
2344
LOG_INFO_EOF End of log-index-file found
2346
LOG_INFO_SEEK Could not allocate IO cache
2348
LOG_INFO_IO Got IO error while reading file
2351
#ifdef HAVE_REPLICATION
2353
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
2356
DBUG_ENTER("purge_first_log");
2358
DBUG_ASSERT(is_open());
2359
DBUG_ASSERT(rli->slave_running == 1);
2360
DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
2362
pthread_mutex_lock(&LOCK_index);
2363
pthread_mutex_lock(&rli->log_space_lock);
2364
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
2365
0, 0, &rli->log_space_total);
2366
// Tell the I/O thread to take the relay_log_space_limit into account
2367
rli->ignore_log_space_limit= 0;
2368
pthread_mutex_unlock(&rli->log_space_lock);
2371
Ok to broadcast after the critical region as there is no risk of
2372
the mutex being destroyed by this thread later - this helps save
2375
pthread_cond_broadcast(&rli->log_space_cond);
2378
Read the next log file name from the index file and pass it back to
2380
If included is true, we want the first relay log;
2381
otherwise we want the one after event_relay_log_name.
2383
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
2385
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
2386
(error=find_next_log(&rli->linfo, 0)))))
2389
sql_print_error("next log error: %d offset: %s log: %s included: %d",
2391
llstr(rli->linfo.index_file_offset,buff),
2392
rli->group_relay_log_name,
2398
Reset rli's coordinates to the current log.
2400
rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
2401
strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
2402
sizeof(rli->event_relay_log_name)-1);
2405
If we removed the rli->group_relay_log_name file,
2406
we must update the rli->group* coordinates, otherwise do not touch it as the
2407
group's execution is not finished (e.g. COMMIT not executed)
2411
rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
2412
strmake(rli->group_relay_log_name,rli->linfo.log_file_name,
2413
sizeof(rli->group_relay_log_name)-1);
2414
rli->notify_group_relay_log_name_update();
2417
/* Store where we are in the new file for the execution thread */
2418
flush_relay_log_info(rli);
2421
pthread_mutex_unlock(&LOCK_index);
2426
Update log index_file.
2429
int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
2431
if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
2434
// now update offsets in index file for running threads
2435
if (need_update_threads)
2436
adjust_linfo_offsets(log_info->index_file_start_offset);
2441
Remove all logs before the given log from disk and from the index file.
2443
@param to_log Delete all log file name before this file.
2444
@param included If true, to_log is deleted too.
2446
@param need_update_threads If we want to update the log coordinates of
2447
all threads. False for relay logs, true otherwise.
2448
@param freed_log_space If not null, decrement this variable of
2449
the amount of log space freed
2452
If any of the logs before the deleted one is in use,
2453
only purge logs up to this one.
2458
LOG_INFO_EOF to_log not found
2459
LOG_INFO_EMFILE too many files opened
2460
LOG_INFO_FATAL if any other than ENOENT error from
2461
my_stat() or my_delete()
2464
int MYSQL_BIN_LOG::purge_logs(const char *to_log,
2467
bool need_update_threads,
2468
ulonglong *decrease_log_space)
2474
DBUG_ENTER("purge_logs");
2475
DBUG_PRINT("info",("to_log= %s",to_log));
2478
pthread_mutex_lock(&LOCK_index);
2479
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
2483
File name exists in index file; delete until we find this file
2484
or a file that is used.
2486
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2488
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
2489
!log_in_use(log_info.log_file_name))
2492
if (!my_stat(log_info.log_file_name, &s, MYF(0)))
2494
if (my_errno == ENOENT)
2497
It's not fatal if we can't stat a log file that does not exist;
2498
If we could not stat, we won't delete.
2500
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2501
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2502
log_info.log_file_name);
2503
sql_print_information("Failed to execute my_stat on file '%s'",
2504
log_info.log_file_name);
2510
Other than ENOENT are fatal
2512
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2513
ER_BINLOG_PURGE_FATAL_ERR,
2514
"a problem with getting info on being purged %s; "
2515
"consider examining correspondence "
2516
"of your binlog index file "
2517
"to the actual binlog files",
2518
log_info.log_file_name);
2519
error= LOG_INFO_FATAL;
2525
DBUG_PRINT("info",("purging %s",log_info.log_file_name));
2526
if (!my_delete(log_info.log_file_name, MYF(0)))
2528
if (decrease_log_space)
2529
*decrease_log_space-= s.st_size;
2533
if (my_errno == ENOENT)
2535
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2536
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2537
log_info.log_file_name);
2538
sql_print_information("Failed to delete file '%s'",
2539
log_info.log_file_name);
2544
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2545
ER_BINLOG_PURGE_FATAL_ERR,
2546
"a problem with deleting %s; "
2547
"consider examining correspondence "
2548
"of your binlog index file "
2549
"to the actual binlog files",
2550
log_info.log_file_name);
2551
if (my_errno == EMFILE)
2554
("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
2555
error= LOG_INFO_EMFILE;
2557
error= LOG_INFO_FATAL;
2563
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2565
if (find_next_log(&log_info, 0) || exit_loop)
2570
If we get killed -9 here, the sysadmin would have to edit
2571
the log index file after restart - otherwise, this should be safe
2573
error= update_log_index(&log_info, need_update_threads);
2580
pthread_mutex_unlock(&LOCK_index);
2585
Remove all logs before the given file date from disk and from the
2588
@param thd Thread pointer
2589
@param before_date Delete all log files before given date.
2592
If any of the logs before the deleted one is in use,
2593
only purge logs up to this one.
2598
LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
2599
LOG_INFO_FATAL if any other than ENOENT error from
2600
my_stat() or my_delete()
2603
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
2609
DBUG_ENTER("purge_logs_before_date");
2611
pthread_mutex_lock(&LOCK_index);
2614
Delete until we find curren file
2615
or a file that is used or a file
2616
that is older than purge_time.
2618
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2621
while (strcmp(log_file_name, log_info.log_file_name) &&
2622
!log_in_use(log_info.log_file_name))
2624
if (!my_stat(log_info.log_file_name, &stat_area, MYF(0)))
2626
if (my_errno == ENOENT)
2629
It's not fatal if we can't stat a log file that does not exist.
2631
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2632
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2633
log_info.log_file_name);
2634
sql_print_information("Failed to execute my_stat on file '%s'",
2635
log_info.log_file_name);
2641
Other than ENOENT are fatal
2643
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2644
ER_BINLOG_PURGE_FATAL_ERR,
2645
"a problem with getting info on being purged %s; "
2646
"consider examining correspondence "
2647
"of your binlog index file "
2648
"to the actual binlog files",
2649
log_info.log_file_name);
2650
error= LOG_INFO_FATAL;
2656
if (stat_area.st_mtime >= purge_time)
2658
if (my_delete(log_info.log_file_name, MYF(0)))
2660
if (my_errno == ENOENT)
2662
/* It's not fatal even if we can't delete a log file */
2663
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2664
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2665
log_info.log_file_name);
2666
sql_print_information("Failed to delete file '%s'",
2667
log_info.log_file_name);
2672
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2673
ER_BINLOG_PURGE_FATAL_ERR,
2674
"a problem with deleting %s; "
2675
"consider examining correspondence "
2676
"of your binlog index file "
2677
"to the actual binlog files",
2678
log_info.log_file_name);
2679
error= LOG_INFO_FATAL;
2683
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2685
if (find_next_log(&log_info, 0))
2690
If we get killed -9 here, the sysadmin would have to edit
2691
the log index file after restart - otherwise, this should be safe
2693
error= update_log_index(&log_info, 1);
2696
pthread_mutex_unlock(&LOCK_index);
2699
#endif /* HAVE_REPLICATION */
2703
Create a new log file name.
2705
@param buf buf of at least FN_REFLEN where new name is stored
2708
If file name will be longer then FN_REFLEN it will be truncated
2711
void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
2713
uint dir_len = dirname_length(log_file_name);
2714
if (dir_len >= FN_REFLEN)
2715
dir_len=FN_REFLEN-1;
2716
strnmov(buf, log_file_name, dir_len);
2717
strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
2722
Check if we are writing/reading to the given log file.
2725
bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
2727
return !strcmp(log_file_name, log_file_name_arg);
2732
Wrappers around new_file_impl to avoid using argument
2733
to control locking. The argument 1) less readable 2) breaks
2734
incapsulation 3) allows external access to the class without
2735
a lock (which is not possible with private new_file_without_locking
2739
void MYSQL_BIN_LOG::new_file()
2745
void MYSQL_BIN_LOG::new_file_without_locking()
2752
Start writing to a new log file or reopen the old file.
2754
@param need_lock Set to 1 if caller has not locked LOCK_log
2757
The new file name is stored last in the index file
2760
void MYSQL_BIN_LOG::new_file_impl(bool need_lock)
2762
char new_name[FN_REFLEN], *new_name_ptr, *old_name;
2764
DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
2767
DBUG_PRINT("info",("log is closed"));
2772
pthread_mutex_lock(&LOCK_log);
2773
pthread_mutex_lock(&LOCK_index);
2775
safe_mutex_assert_owner(&LOCK_log);
2776
safe_mutex_assert_owner(&LOCK_index);
2779
if binlog is used as tc log, be sure all xids are "unlogged",
2780
so that on recover we only need to scan one - latest - binlog file
2781
for prepared xids. As this is expected to be a rare event,
2782
simple wait strategy is enough. We're locking LOCK_log to be sure no
2783
new Xid_log_event's are added to the log (and prepared_xids is not
2784
increased), and waiting on COND_prep_xids for late threads to
2789
tc_log_page_waits++;
2790
pthread_mutex_lock(&LOCK_prep_xids);
2791
while (prepared_xids) {
2792
DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
2793
pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
2795
pthread_mutex_unlock(&LOCK_prep_xids);
2798
/* Reuse old name if not binlog and not update log */
2802
If user hasn't specified an extension, generate a new log name
2803
We have to do this here and not in open as we want to store the
2804
new file name in the current binary log file.
2806
if (generate_new_name(new_name, name))
2808
new_name_ptr=new_name;
2810
if (log_type == LOG_BIN)
2812
if (!no_auto_events)
2815
We log the whole file name for log file as the user may decide
2816
to change base names at some point.
2818
Rotate_log_event r(new_name+dirname_length(new_name),
2819
0, LOG_EVENT_OFFSET, 0);
2821
bytes_written += r.data_written;
2824
Update needs to be signalled even if there is no rotate event
2825
log rotation should give the waiting thread a signal to
2826
discover EOF and move on to the next log.
2831
name=0; // Don't free name
2832
close(LOG_CLOSE_TO_BE_OPENED);
2835
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2839
new_file() is only used for rotation (in FLUSH LOGS or because size >
2840
max_binlog_size or max_relay_log_size).
2841
If this is a binary log, the Format_description_log_event at the beginning of
2842
the new file should have created=0 (to distinguish with the
2843
Format_description_log_event written at server startup, which should
2844
trigger temp tables deletion on slaves.
2847
open(old_name, log_type, new_name_ptr,
2848
io_cache_type, no_auto_events, max_size, 1);
2849
my_free(old_name,MYF(0));
2853
pthread_mutex_unlock(&LOCK_log);
2854
pthread_mutex_unlock(&LOCK_index);
2860
bool MYSQL_BIN_LOG::append(Log_event* ev)
2863
pthread_mutex_lock(&LOCK_log);
2864
DBUG_ENTER("MYSQL_BIN_LOG::append");
2866
DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
2868
Log_event::write() is smart enough to use my_b_write() or
2869
my_b_append() depending on the kind of cache we have.
2871
if (ev->write(&log_file))
2876
bytes_written+= ev->data_written;
2877
DBUG_PRINT("info",("max_size: %lu",max_size));
2878
if ((uint) my_b_append_tell(&log_file) > max_size)
2879
new_file_without_locking();
2882
pthread_mutex_unlock(&LOCK_log);
2883
signal_update(); // Safe as we don't call close
2888
bool MYSQL_BIN_LOG::appendv(const char* buf, uint len,...)
2891
DBUG_ENTER("MYSQL_BIN_LOG::appendv");
2895
DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
2897
safe_mutex_assert_owner(&LOCK_log);
2900
if (my_b_append(&log_file,(uchar*) buf,len))
2905
bytes_written += len;
2906
} while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
2907
DBUG_PRINT("info",("max_size: %lu",max_size));
2908
if ((uint) my_b_append_tell(&log_file) > max_size)
2909
new_file_without_locking();
2918
bool MYSQL_BIN_LOG::flush_and_sync()
2920
int err=0, fd=log_file.file;
2921
safe_mutex_assert_owner(&LOCK_log);
2922
if (flush_io_cache(&log_file))
2924
if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
2926
sync_binlog_counter= 0;
2927
err=my_sync(fd, MYF(MY_WME));
2932
void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
2934
DBUG_ASSERT(!thd->binlog_evt_union.do_union);
2935
thd->binlog_evt_union.do_union= TRUE;
2936
thd->binlog_evt_union.unioned_events= FALSE;
2937
thd->binlog_evt_union.unioned_events_trans= FALSE;
2938
thd->binlog_evt_union.first_query_id= query_id_param;
2941
void MYSQL_BIN_LOG::stop_union_events(THD *thd)
2943
DBUG_ASSERT(thd->binlog_evt_union.do_union);
2944
thd->binlog_evt_union.do_union= FALSE;
2947
bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
2949
return (thd->binlog_evt_union.do_union &&
2950
query_id_param >= thd->binlog_evt_union.first_query_id);
2955
These functions are placed in this file since they need access to
2956
binlog_hton, which has internal linkage.
2959
int THD::binlog_setup_trx_data()
2961
DBUG_ENTER("THD::binlog_setup_trx_data");
2962
binlog_trx_data *trx_data=
2963
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2966
DBUG_RETURN(0); // Already set up
2968
trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL));
2970
open_cached_file(&trx_data->trans_log, mysql_tmpdir,
2971
LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
2973
my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR));
2974
DBUG_RETURN(1); // Didn't manage to set it up
2976
thd_set_ha_data(this, binlog_hton, trx_data);
2978
trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data;
2984
Function to start a statement and optionally a transaction for the
2988
binlog_start_trans_and_stmt()
2992
This function does three things:
2993
- Start a transaction if not in autocommit mode or if a BEGIN
2994
statement has been seen.
2996
- Start a statement transaction to allow us to truncate the binary
2999
- Save the currrent binlog position so that we can roll back the
3000
statement by truncating the transaction log.
3002
We only update the saved position if the old one was undefined,
3003
the reason is that there are some cases (e.g., for CREATE-SELECT)
3004
where the position is saved twice (e.g., both in
3005
select_create::prepare() and THD::binlog_write_table_map()) , but
3006
we should use the first. This means that calls to this function
3007
can be used to start the statement before the first table map
3008
event, to include some extra events.
3012
THD::binlog_start_trans_and_stmt()
3014
binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3015
DBUG_ENTER("binlog_start_trans_and_stmt");
3016
DBUG_PRINT("enter", ("trx_data: 0x%lx trx_data->before_stmt_pos: %lu",
3018
(trx_data ? (ulong) trx_data->before_stmt_pos :
3021
if (trx_data == NULL ||
3022
trx_data->before_stmt_pos == MY_OFF_T_UNDEF)
3024
this->binlog_set_stmt_begin();
3025
if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
3026
trans_register_ha(this, TRUE, binlog_hton);
3027
trans_register_ha(this, FALSE, binlog_hton);
3029
Mark statement transaction as read/write. We never start
3030
a binary log transaction and keep it read-only,
3031
therefore it's best to mark the transaction read/write just
3032
at the same time we start it.
3033
Not necessary to mark the normal transaction read/write
3034
since the statement-level flag will be propagated automatically
3035
inside ha_commit_trans.
3037
ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
3042
void THD::binlog_set_stmt_begin() {
3043
binlog_trx_data *trx_data=
3044
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3047
The call to binlog_trans_log_savepos() might create the trx_data
3048
structure, if it didn't exist before, so we save the position
3049
into an auto variable and then write it into the transaction
3050
data for the binary log (i.e., trx_data).
3053
binlog_trans_log_savepos(this, &pos);
3054
trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3055
trx_data->before_stmt_pos= pos;
3060
Write a table map to the binary log.
3063
int THD::binlog_write_table_map(TABLE *table, bool is_trans)
3066
DBUG_ENTER("THD::binlog_write_table_map");
3067
DBUG_PRINT("enter", ("table: 0x%lx (%s: #%lu)",
3068
(long) table, table->s->table_name.str,
3069
table->s->table_map_id));
3071
/* Pre-conditions */
3072
DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open());
3073
DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
3075
Table_map_log_event::flag_set const
3076
flags= Table_map_log_event::TM_NO_FLAGS;
3079
the_event(this, table, table->s->table_map_id, is_trans, flags);
3081
if (is_trans && binlog_table_maps == 0)
3082
binlog_start_trans_and_stmt();
3084
if ((error= mysql_bin_log.write(&the_event)))
3087
binlog_table_maps++;
3088
table->s->table_map_version= mysql_bin_log.table_map_version();
3093
THD::binlog_get_pending_rows_event() const
3095
binlog_trx_data *const trx_data=
3096
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3098
This is less than ideal, but here's the story: If there is no
3099
trx_data, prepare_pending_rows_event() has never been called
3100
(since the trx_data is set up there). In that case, we just return
3103
return trx_data ? trx_data->pending() : NULL;
3107
THD::binlog_set_pending_rows_event(Rows_log_event* ev)
3109
if (thd_get_ha_data(this, binlog_hton) == NULL)
3110
binlog_setup_trx_data();
3112
binlog_trx_data *const trx_data=
3113
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3115
DBUG_ASSERT(trx_data);
3116
trx_data->set_pending(ev);
3121
Moves the last bunch of rows from the pending Rows event to the binlog
3122
(either cached binlog if transaction, or disk binlog). Sets a new pending
3126
MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
3127
Rows_log_event* event)
3129
DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
3130
DBUG_ASSERT(mysql_bin_log.is_open());
3131
DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
3135
binlog_trx_data *const trx_data=
3136
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3138
DBUG_ASSERT(trx_data);
3140
DBUG_PRINT("info", ("trx_data->pending(): 0x%lx", (long) trx_data->pending()));
3142
if (Rows_log_event* pending= trx_data->pending())
3144
IO_CACHE *file= &log_file;
3147
Decide if we should write to the log file directly or to the
3150
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
3151
file= &trx_data->trans_log;
3154
If we are writing to the log file directly, we could avoid
3155
locking the log. This does not work since we need to step the
3156
m_table_map_version below, and that change has to be protected
3157
by the LOCK_log mutex.
3159
pthread_mutex_lock(&LOCK_log);
3162
Write pending event to log file or transaction cache
3164
if (pending->write(file))
3166
pthread_mutex_unlock(&LOCK_log);
3171
We step the table map version if we are writing an event
3172
representing the end of a statement. We do this regardless of
3173
wheather we write to the transaction cache or to directly to the
3176
In an ideal world, we could avoid stepping the table map version
3177
if we were writing to a transaction cache, since we could then
3178
reuse the table map that was written earlier in the transaction
3179
cache. This does not work since STMT_END_F implies closing all
3180
table mappings on the slave side.
3182
TODO: Find a solution so that table maps does not have to be
3183
written several times within a transaction.
3185
if (pending->get_flags(Rows_log_event::STMT_END_F))
3186
++m_table_map_version;
3190
if (file == &log_file)
3192
error= flush_and_sync();
3196
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3200
pthread_mutex_unlock(&LOCK_log);
3203
thd->binlog_set_pending_rows_event(event);
3209
Write an event to the binary log.
3212
bool MYSQL_BIN_LOG::write(Log_event *event_info)
3214
THD *thd= event_info->thd;
3216
DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
3218
if (thd->binlog_evt_union.do_union)
3221
In Stored function; Remember that function call caused an update.
3222
We will log the function call to the binary log on function exit
3224
thd->binlog_evt_union.unioned_events= TRUE;
3225
thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
3230
Flush the pending rows event to the transaction cache or to the
3231
log file. Since this function potentially aquire the LOCK_log
3232
mutex, we do this before aquiring the LOCK_log mutex in this
3235
We only end the statement if we are in a top-level statement. If
3236
we are inside a stored function, we do not end the statement since
3237
this will close all tables on the slave.
3239
bool const end_stmt= false;
3240
thd->binlog_flush_pending_rows_event(end_stmt);
3242
pthread_mutex_lock(&LOCK_log);
3245
In most cases this is only called if 'is_open()' is true; in fact this is
3246
mostly called if is_open() *was* true a few instructions before, but it
3247
could have changed since.
3249
if (likely(is_open()))
3251
IO_CACHE *file= &log_file;
3253
In the future we need to add to the following if tests like
3254
"do the involved tables match (to be implemented)
3255
binlog_[wild_]{do|ignore}_table?" (WL#1049)"
3257
const char *local_db= event_info->get_db();
3258
if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
3259
(!binlog_filter->db_ok(local_db)))
3261
VOID(pthread_mutex_unlock(&LOCK_log));
3266
Should we write to the binlog cache or to the binlog on disk?
3267
Write to the binlog cache if:
3268
- it is already not empty (meaning we're in a transaction; note that the
3269
present event could be about a non-transactional table, but still we need
3270
to write to the binlog cache in that case to handle updates to mixed
3271
trans/non-trans table types the best possible in binlogging)
3272
- or if the event asks for it (cache_stmt == TRUE).
3274
if (opt_using_transactions && thd)
3276
if (thd->binlog_setup_trx_data())
3279
binlog_trx_data *const trx_data=
3280
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3281
IO_CACHE *trans_log= &trx_data->trans_log;
3282
my_off_t trans_log_pos= my_b_tell(trans_log);
3283
if (event_info->get_cache_stmt() || trans_log_pos != 0)
3285
DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu",
3286
event_info->get_cache_stmt(),
3287
(ulong) trans_log_pos));
3288
if (trans_log_pos == 0)
3289
thd->binlog_start_trans_and_stmt();
3293
TODO as Mats suggested, for all the cases above where we write to
3294
trans_log, it sounds unnecessary to lock LOCK_log. We should rather
3295
test first if we want to write to trans_log, and if not, lock
3299
DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
3302
No check for auto events flag here - this write method should
3303
never be called if auto-events are enabled
3307
1. Write first log events which describe the 'run environment'
3312
If row-based binlogging, Insert_id, Rand and other kind of "setting
3313
context" events are not needed.
3317
if (!thd->current_stmt_binlog_row_based)
3319
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3321
Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
3322
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
3326
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3328
DBUG_PRINT("info",("number of auto_inc intervals: %u",
3329
thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3332
If the auto_increment was second in a table's index (possible with
3333
MyISAM or BDB) (table->next_number_keypart != 0), such event is
3334
in fact not necessary. We could avoid logging it.
3336
Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
3337
thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3344
Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
3348
if (thd->user_var_events.elements)
3350
for (uint i= 0; i < thd->user_var_events.elements; i++)
3352
BINLOG_USER_VAR_EVENT *user_var_event;
3353
get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
3354
User_var_log_event e(thd, user_var_event->user_var_event->name.str,
3355
user_var_event->user_var_event->name.length,
3356
user_var_event->value,
3357
user_var_event->length,
3358
user_var_event->type,
3359
user_var_event->charset_number);
3368
Write the SQL command
3371
if (event_info->write(file))
3374
if (file == &log_file) // we are writing to the real log (disk)
3376
if (flush_and_sync())
3379
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3386
if (my_errno == EFBIG)
3387
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
3389
my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
3394
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
3395
++m_table_map_version;
3397
pthread_mutex_unlock(&LOCK_log);
3402
int error_log_print(enum loglevel level, const char *format,
3405
return logger.error_log_print(level, format, args);
3409
bool slow_log_print(THD *thd, const char *query, uint query_length,
3410
ulonglong current_utime)
3412
return logger.slow_log_print(thd, query, query_length, current_utime);
3416
bool LOGGER::log_command(THD *thd, enum enum_server_command command)
3419
Log command if we have at least one log event handler enabled and want
3420
to log this king of commands
3422
if (*general_log_handler_list && (what_to_log & (1L << (uint) command)))
3424
if (thd->options & OPTION_LOG_OFF)
3437
bool general_log_print(THD *thd, enum enum_server_command command,
3438
const char *format, ...)
3443
/* Print the message to the buffer if we want to log this king of commands */
3444
if (! logger.log_command(thd, command))
3447
va_start(args, format);
3448
error= logger.general_log_print(thd, command, format, args);
3454
bool general_log_write(THD *thd, enum enum_server_command command,
3455
const char *query, uint query_length)
3457
/* Write the message to the log if we want to log this king of commands */
3458
if (logger.log_command(thd, command))
3459
return logger.general_log_write(thd, command, query, query_length);
3464
void MYSQL_BIN_LOG::rotate_and_purge(uint flags)
3466
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3467
pthread_mutex_lock(&LOCK_log);
3468
if ((flags & RP_FORCE_ROTATE) ||
3469
(my_b_tell(&log_file) >= (my_off_t) max_size))
3471
new_file_without_locking();
3472
#ifdef HAVE_REPLICATION
3473
if (expire_logs_days)
3475
time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
3476
if (purge_time >= 0)
3477
purge_logs_before_date(purge_time);
3481
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3482
pthread_mutex_unlock(&LOCK_log);
3485
uint MYSQL_BIN_LOG::next_file_id()
3488
pthread_mutex_lock(&LOCK_log);
3490
pthread_mutex_unlock(&LOCK_log);
3496
Write the contents of a cache to the binary log.
3500
cache Cache to write to the binary log
3501
lock_log True if the LOCK_log mutex should be aquired, false otherwise
3502
sync_log True if the log should be flushed and sync:ed
3505
Write the contents of the cache to the binary log. The cache will
3506
be reset as a READ_CACHE to be able to read the contents from it.
3509
int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
3511
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
3513
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
3514
return ER_ERROR_ON_WRITE;
3515
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
3517
uchar header[LOG_EVENT_HEADER_LEN];
3520
The events in the buffer have incorrect end_log_pos data
3521
(relative to beginning of group rather than absolute),
3522
so we'll recalculate them in situ so the binlog is always
3523
correct, even in the middle of a group. This is possible
3524
because we now know the start position of the group (the
3525
offset of this cache in the log, if you will); all we need
3526
to do is to find all event-headers, and add the position of
3527
the group to the end_log_pos of each event. This is pretty
3528
straight forward, except that we read the cache in segments,
3529
so an event-header might end up on the cache-border and get
3533
group= (uint)my_b_tell(&log_file);
3540
if we only got a partial header in the last iteration,
3541
get the other half now and process a full header.
3543
if (unlikely(carry > 0))
3545
DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
3547
/* assemble both halves */
3548
memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
3550
/* fix end_log_pos */
3551
val= uint4korr(&header[LOG_POS_OFFSET]) + group;
3552
int4store(&header[LOG_POS_OFFSET], val);
3554
/* write the first half of the split header */
3555
if (my_b_write(&log_file, header, carry))
3556
return ER_ERROR_ON_WRITE;
3559
copy fixed second half of header to cache so the correct
3560
version will be written later.
3562
memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
3564
/* next event header at ... */
3565
hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
3570
/* if there is anything to write, process it. */
3572
if (likely(length > 0))
3575
process all event-headers in this (partial) cache.
3576
if next header is beyond current read-buffer,
3577
we'll get it later (though not necessarily in the
3578
very next iteration, just "eventually").
3581
while (hdr_offs < length)
3584
partial header only? save what we can get, process once
3588
if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
3590
carry= length - hdr_offs;
3591
memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
3596
/* we've got a full event-header, and it came in one piece */
3598
uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
3600
/* fix end_log_pos */
3601
val= uint4korr(log_pos) + group;
3602
int4store(log_pos, val);
3604
/* next event header at ... */
3605
log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
3606
hdr_offs += uint4korr(log_pos);
3612
Adjust hdr_offs. Note that it may still point beyond the segment
3613
read in the next iteration; if the current event is very long,
3614
it may take a couple of read-iterations (and subsequent adjustments
3615
of hdr_offs) for it to point into the then-current segment.
3616
If we have a split header (!carry), hdr_offs will be set at the
3617
beginning of the next iteration, overwriting the value we set here:
3622
/* Write data to the binary log file */
3623
if (my_b_write(&log_file, cache->read_pos, length))
3624
return ER_ERROR_ON_WRITE;
3625
cache->read_pos=cache->read_end; // Mark buffer used up
3626
} while ((length= my_b_fill(cache)));
3628
DBUG_ASSERT(carry == 0);
3637
Write a cached log entry to the binary log.
3638
- To support transaction over replication, we wrap the transaction
3639
with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
3640
We want to write a BEGIN/ROLLBACK block when a non-transactional table
3641
was updated in a transaction which was rolled back. This is to ensure
3642
that the same updates are run on the slave.
3645
@param cache The cache to copy to the binlog
3646
@param commit_event The commit event to print after writing the
3647
contents of the cache.
3650
We only come here if there is something in the cache.
3652
The thing in the cache is always a complete transaction.
3654
'cache' needs to be reinitialized after this functions returns.
3657
bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
3659
DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
3660
VOID(pthread_mutex_lock(&LOCK_log));
3662
/* NULL would represent nothing to replicate after ROLLBACK */
3663
DBUG_ASSERT(commit_event != NULL);
3665
DBUG_ASSERT(is_open());
3666
if (likely(is_open())) // Should always be true
3669
We only bother to write to the binary log if there is anything
3672
if (my_b_tell(cache) > 0)
3675
Log "BEGIN" at the beginning of every transaction. Here, a
3676
transaction is either a BEGIN..COMMIT block or a single
3677
statement in autocommit mode.
3679
Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE);
3681
Imagine this is rollback due to net timeout, after all
3682
statements of the transaction succeeded. Then we want a
3683
zero-error code in BEGIN. In other words, if there was a
3684
really serious error code it's already in the statement's
3685
events, there is no need to put it also in this internally
3686
generated event, and as this event is generated late it would
3687
lead to false alarms.
3689
This is safer than thd->clear_error() against kills at shutdown.
3691
qinfo.error_code= 0;
3693
Now this Query_log_event has artificial log_pos 0. It must be
3694
adjusted to reflect the real position in the log. Not doing it
3695
would confuse the slave: it would prevent this one from
3696
knowing where he is in the master's binlog, which would result
3697
in wrong positions being shown to the user, MASTER_POS_WAIT
3700
if (qinfo.write(&log_file))
3703
DBUG_EXECUTE_IF("crash_before_writing_xid",
3705
if ((write_error= write_cache(cache, false, true)))
3706
DBUG_PRINT("info", ("error writing binlog cache: %d",
3708
DBUG_PRINT("info", ("crashing before writing xid"));
3712
if ((write_error= write_cache(cache, false, false)))
3715
if (commit_event && commit_event->write(&log_file))
3717
if (flush_and_sync())
3719
DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
3720
if (cache->error) // Error on read
3722
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
3723
write_error=1; // Don't give more errors
3730
if commit_event is Xid_log_event, increase the number of
3731
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
3732
if there're prepared xids in it - see the comment in new_file() for
3734
If the commit_event is not Xid_log_event (then it's a Query_log_event)
3735
rotate binlog, if necessary.
3737
if (commit_event && commit_event->get_type_code() == XID_EVENT)
3739
pthread_mutex_lock(&LOCK_prep_xids);
3741
pthread_mutex_unlock(&LOCK_prep_xids);
3744
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3746
VOID(pthread_mutex_unlock(&LOCK_log));
3754
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
3756
VOID(pthread_mutex_unlock(&LOCK_log));
3762
Wait until we get a signal that the relay log has been updated
3764
@param[in] thd a THD struct
3766
LOCK_log must be taken before calling this function.
3767
It will be released at the end of the function.
3770
void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
3772
const char *old_msg;
3773
DBUG_ENTER("wait_for_update_relay_log");
3774
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3775
"Slave has read all relay log; "
3776
"waiting for the slave I/O "
3777
"thread to update it" );
3778
pthread_cond_wait(&update_cond, &LOCK_log);
3779
thd->exit_cond(old_msg);
3785
Wait until we get a signal that the binary log has been updated.
3786
Applies to master only.
3789
@param[in] thd a THD struct
3790
@param[in] timeout a pointer to a timespec;
3791
NULL means to wait w/o timeout.
3792
@retval 0 if got signalled on update
3793
@retval non-0 if wait timeout elapsed
3795
LOCK_log must be taken before calling this function.
3796
LOCK_log is being released while the thread is waiting.
3797
LOCK_log is released by the caller.
3800
int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
3801
const struct timespec *timeout)
3804
const char* old_msg = thd->proc_info;
3805
DBUG_ENTER("wait_for_update_bin_log");
3806
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3807
"Master has sent all binlog to slave; "
3808
"waiting for binlog to be updated");
3810
pthread_cond_wait(&update_cond, &LOCK_log);
3812
ret= pthread_cond_timedwait(&update_cond, &LOCK_log,
3813
const_cast<struct timespec *>(timeout));
3821
@param exiting Bitmask for one or more of the following bits:
3822
- LOG_CLOSE_INDEX : if we should close the index file
3823
- LOG_CLOSE_TO_BE_OPENED : if we intend to call open
3824
at once after close.
3825
- LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
3828
One can do an open on the object at once after doing a close.
3829
The internal structures are not freed until cleanup() is called
3832
void MYSQL_BIN_LOG::close(uint exiting)
3833
{ // One can't set log_type here!
3834
DBUG_ENTER("MYSQL_BIN_LOG::close");
3835
DBUG_PRINT("enter",("exiting: %d", (int) exiting));
3836
if (log_state == LOG_OPENED)
3838
#ifdef HAVE_REPLICATION
3839
if (log_type == LOG_BIN && !no_auto_events &&
3840
(exiting & LOG_CLOSE_STOP_EVENT))
3844
bytes_written+= s.data_written;
3847
#endif /* HAVE_REPLICATION */
3849
/* don't pwrite in a file opened with O_APPEND - it doesn't work */
3850
if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
3852
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
3853
my_off_t org_position= my_tell(log_file.file, MYF(0));
3854
uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
3855
my_pwrite(log_file.file, &flags, 1, offset, MYF(0));
3857
Restore position so that anything we have in the IO_cache is written
3858
to the correct position.
3859
We need the seek here, as my_pwrite() is not guaranteed to keep the
3860
original position on system that doesn't support pwrite().
3862
my_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
3865
/* this will cleanup IO_CACHE, sync and close the file */
3866
MYSQL_LOG::close(exiting);
3870
The following test is needed even if is_open() is not set, as we may have
3871
called a not complete close earlier and the index file is still open.
3874
if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
3876
end_io_cache(&index_file);
3877
if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
3880
sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
3883
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
3889
void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
3892
We need to take locks, otherwise this may happen:
3893
new_file() is called, calls open(old_max_size), then before open() starts,
3894
set_max_size() sets max_size to max_size_arg, then open() starts and
3895
uses the old_max_size argument, so max_size_arg has been overwritten and
3896
it's like if the SET command was never run.
3898
DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
3899
pthread_mutex_lock(&LOCK_log);
3901
max_size= max_size_arg;
3902
pthread_mutex_unlock(&LOCK_log);
3908
Check if a string is a valid number.
3910
@param str String to test
3911
@param res Store value here
3912
@param allow_wildcards Set to 1 if we should ignore '%' and '_'
3915
For the moment the allow_wildcards argument is not used
3916
Should be move to some other file.
3919
1 String is a number
3924
static bool test_if_number(register const char *str,
3925
long *res, bool allow_wildcards)
3929
DBUG_ENTER("test_if_number");
3933
while (*str++ == ' ') ;
3934
if (*--str == '-' || *str == '+')
3936
while (my_isdigit(files_charset_info,*str) ||
3937
(allow_wildcards && (*str == wild_many || *str == wild_one)))
3945
my_isdigit(files_charset_info,*str) ||
3946
(allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3949
if (*str != 0 || flag == 0)
3953
DBUG_RETURN(1); /* Number ok */
3954
} /* test_if_number */
3957
void sql_perror(const char *message)
3959
#ifdef HAVE_STRERROR
3960
sql_print_error("%s: %s",message, strerror(errno));
3967
bool flush_error_log()
3972
char err_renamed[FN_REFLEN], *end;
3973
end= strmake(err_renamed,log_error_file,FN_REFLEN-4);
3974
strmov(end, "-old");
3975
VOID(pthread_mutex_lock(&LOCK_error_log));
3976
char err_temp[FN_REFLEN+4];
3978
On Windows is necessary a temporary file for to rename
3979
the current error file.
3981
strxmov(err_temp, err_renamed,"-tmp",NullS);
3982
(void) my_delete(err_temp, MYF(0));
3983
if (freopen(err_temp,"a+",stdout))
3989
freopen(err_temp,"a+",stderr);
3990
(void) my_delete(err_renamed, MYF(0));
3991
my_rename(log_error_file,err_renamed,MYF(0));
3992
if (freopen(log_error_file,"a+",stdout))
3993
freopen(log_error_file,"a+",stderr);
3995
if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0)
3997
while ((bytes= my_read(fd, buf, IO_SIZE, MYF(0))) &&
3998
bytes != MY_FILE_ERROR)
3999
my_fwrite(stderr, buf, bytes, MYF(0));
4000
my_close(fd, MYF(0));
4002
(void) my_delete(err_temp, MYF(0));
4006
VOID(pthread_mutex_unlock(&LOCK_error_log));
4011
void MYSQL_BIN_LOG::signal_update()
4013
DBUG_ENTER("MYSQL_BIN_LOG::signal_update");
4014
pthread_cond_broadcast(&update_cond);
4019
Prints a printf style message to the error log and, under NT, to the
4022
This function prints the message into a buffer and then sends that buffer
4023
to other functions to write that message to other logging sources.
4025
@param event_type Type of event to write (Error, Warning, or Info)
4026
@param format Printf style format of message
4027
@param args va_list list of arguments for the message
4030
The function always returns 0. The return value is present in the
4031
signature to be compatible with other logging routines, which could
4032
return an error (e.g. logging to the log tables)
4034
static void print_buffer_to_file(enum loglevel level, int error_code,
4035
const char *buffer, size_t buffer_length)
4040
DBUG_ENTER("print_buffer_to_file");
4041
DBUG_PRINT("enter",("buffer: %s", buffer));
4043
VOID(pthread_mutex_lock(&LOCK_error_log));
4046
localtime_r(&skr, &tm_tmp);
4049
fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n",
4050
start->tm_year % 100,
4056
(level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
4057
"Warning" : "Note"),
4062
VOID(pthread_mutex_unlock(&LOCK_error_log));
4067
int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
4071
int error_code= errno;
4072
DBUG_ENTER("vprint_msg_to_log");
4074
length= my_vsnprintf(buff, sizeof(buff), format, args);
4076
print_buffer_to_file(level, error_code, buff, length);
4082
void sql_print_error(const char *format, ...)
4085
DBUG_ENTER("sql_print_error");
4087
va_start(args, format);
4088
error_log_print(ERROR_LEVEL, format, args);
4095
void sql_print_warning(const char *format, ...)
4098
DBUG_ENTER("sql_print_warning");
4100
va_start(args, format);
4101
error_log_print(WARNING_LEVEL, format, args);
4108
void sql_print_information(const char *format, ...)
4111
DBUG_ENTER("sql_print_information");
4113
va_start(args, format);
4114
error_log_print(INFORMATION_LEVEL, format, args);
4121
/********* transaction coordinator log for 2pc - mmap() based solution *******/
4124
the log consists of a file, mmapped to a memory.
4125
file is divided on pages of tc_log_page_size size.
4126
(usable size of the first page is smaller because of log header)
4127
there's PAGE control structure for each page
4128
each page (or rather PAGE control structure) can be in one of three
4129
states - active, syncing, pool.
4130
there could be only one page in active or syncing states,
4131
but many in pool - pool is fifo queue.
4132
usual lifecycle of a page is pool->active->syncing->pool
4133
"active" page - is a page where new xid's are logged.
4134
the page stays active as long as syncing slot is taken.
4135
"syncing" page is being synced to disk. no new xid can be added to it.
4136
when the sync is done the page is moved to a pool and an active page
4139
the result of such an architecture is a natural "commit grouping" -
4140
If commits are coming faster than the system can sync, they do not
4141
stall. Instead, all commit that came since the last sync are
4142
logged to the same page, and they all are synced with the next -
4143
one - sync. Thus, thought individual commits are delayed, throughput
4146
when a xid is added to an active page, the thread of this xid waits
4147
for a page's condition until the page is synced. when syncing slot
4148
becomes vacant one of these waiters is awaken to take care of syncing.
4149
it syncs the page and signals all waiters that the page is synced.
4150
PAGE::waiters is used to count these waiters, and a page may never
4151
become active again until waiters==0 (that is all waiters from the
4152
previous sync have noticed the sync was completed)
4154
note, that the page becomes "dirty" and has to be synced only when a
4155
new xid is added into it. Removing a xid from a page does not make it
4156
dirty - we don't sync removals to disk.
4159
ulong tc_log_page_waits= 0;
4163
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
4165
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
4167
ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
4168
ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
4170
int TC_LOG_MMAP::open(const char *opt_name)
4176
DBUG_ASSERT(total_ha_2pc > 1);
4177
DBUG_ASSERT(opt_name && opt_name[0]);
4179
tc_log_page_size= my_getpagesize();
4180
DBUG_ASSERT(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
4182
fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
4183
if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
4185
if (my_errno != ENOENT)
4187
if (using_heuristic_recover())
4189
if ((fd= my_create(logname, CREATE_MODE, O_RDWR, MYF(MY_WME))) < 0)
4192
file_length= opt_tc_log_size;
4193
if (my_chsize(fd, file_length, 0, MYF(MY_WME)))
4200
sql_print_information("Recovering after a crash using %s", opt_name);
4201
if (tc_heuristic_recover)
4203
sql_print_error("Cannot perform automatic crash recovery when "
4204
"--tc-heuristic-recover is used");
4207
file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
4208
if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
4212
data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
4213
MAP_NOSYNC|MAP_SHARED, fd, 0);
4214
if (data == MAP_FAILED)
4221
npages=(uint)file_length/tc_log_page_size;
4222
DBUG_ASSERT(npages >= 3); // to guarantee non-empty pool
4223
if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
4226
for (pg=pages, i=0; i < npages; i++, pg++)
4231
pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
4232
pthread_cond_init (&pg->cond, 0);
4233
pg->start=(my_xid *)(data + i*tc_log_page_size);
4235
pg->end=(my_xid *)(pg->start + tc_log_page_size);
4236
pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
4238
pages[0].size=pages[0].free=
4239
(tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
4240
pages[0].start=pages[0].end-pages[0].size;
4241
pages[npages-1].next=0;
4244
if (crashed && recover())
4247
memcpy(data, tc_log_magic, sizeof(tc_log_magic));
4248
data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
4249
my_msync(fd, data, tc_log_page_size, MS_SYNC);
4252
pthread_mutex_init(&LOCK_sync, MY_MUTEX_INIT_FAST);
4253
pthread_mutex_init(&LOCK_active, MY_MUTEX_INIT_FAST);
4254
pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST);
4255
pthread_cond_init(&COND_active, 0);
4256
pthread_cond_init(&COND_pool, 0);
4263
pool_last=pages+npages-1;
4273
there is no active page, let's got one from the pool.
4275
Two strategies here:
4276
-# take the first from the pool
4277
-# if there're waiters - take the one with the most free space.
4280
TODO page merging. try to allocate adjacent page first,
4281
so that they can be flushed both in one sync
4284
void TC_LOG_MMAP::get_active_from_pool()
4286
PAGE **p, **best_p=0;
4290
pthread_mutex_lock(&LOCK_pool);
4295
if ((*p)->waiters == 0) // can the first page be used ?
4296
break; // yes - take it.
4298
best_free=0; // no - trying second strategy
4299
for (p=&(*p)->next; *p; p=&(*p)->next)
4301
if ((*p)->waiters == 0 && (*p)->free > best_free)
4303
best_free=(*p)->free;
4308
while ((*best_p == 0 || best_free == 0) && overflow());
4311
if (active->free == active->size) // we've chosen an empty page
4313
tc_log_cur_pages_used++;
4314
set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
4317
if ((*best_p)->next) // unlink the page from the pool
4318
*best_p=(*best_p)->next;
4323
pthread_mutex_unlock(&LOCK_pool);
4328
perhaps, increase log size ?
4330
int TC_LOG_MMAP::overflow()
4333
simple overflow handling - just wait
4334
TODO perhaps, increase log size ?
4335
let's check the behaviour of tc_log_page_waits first
4337
tc_log_page_waits++;
4338
pthread_cond_wait(&COND_pool, &LOCK_pool);
4339
return 1; // always return 1
4343
Record that transaction XID is committed on the persistent storage.
4345
This function is called in the middle of two-phase commit:
4346
First all resources prepare the transaction, then tc_log->log() is called,
4347
then all resources commit the transaction, then tc_log->unlog() is called.
4349
All access to active page is serialized but it's not a problem, as
4350
we're assuming that fsync() will be a main bottleneck.
4351
That is, parallelizing writes to log pages we'll decrease number of
4352
threads waiting for a page, but then all these threads will be waiting
4353
for a fsync() anyway
4355
If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
4356
records XID in a special Xid_log_event.
4357
If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
4363
\# - otherwise, "cookie", a number that will be passed as an argument
4364
to unlog() call. tc_log can define it any way it wants,
4365
and use for whatever purposes. TC_LOG_MMAP sets it
4366
to the position in memory where xid was logged to.
4369
int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
4375
pthread_mutex_lock(&LOCK_active);
4378
if active page is full - just wait...
4379
frankly speaking, active->free here accessed outside of mutex
4380
protection, but it's safe, because it only means we may miss an
4381
unlog() for the active page, and we're not waiting for it here -
4382
unlog() does not signal COND_active.
4384
while (unlikely(active && active->free == 0))
4385
pthread_cond_wait(&COND_active, &LOCK_active);
4387
/* no active page ? take one from the pool */
4389
get_active_from_pool();
4392
pthread_mutex_lock(&p->lock);
4394
/* searching for an empty slot */
4398
DBUG_ASSERT(p->ptr < p->end); // because p->free > 0
4401
/* found! store xid there and mark the page dirty */
4402
cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
4407
/* to sync or not to sync - this is the question */
4408
pthread_mutex_unlock(&LOCK_active);
4409
pthread_mutex_lock(&LOCK_sync);
4410
pthread_mutex_unlock(&p->lock);
4413
{ // somebody's syncing. let's wait
4416
note - it must be while (), not do ... while () here
4417
as p->state may be not DIRTY when we come here
4419
while (p->state == DIRTY && syncing)
4420
pthread_cond_wait(&p->cond, &LOCK_sync);
4422
err= p->state == ERROR;
4423
if (p->state != DIRTY) // page was synced
4425
if (p->waiters == 0)
4426
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4427
pthread_mutex_unlock(&LOCK_sync);
4428
goto done; // we're done
4430
} // page was not synced! do it now
4431
DBUG_ASSERT(active == p && syncing == 0);
4432
pthread_mutex_lock(&LOCK_active);
4433
syncing=p; // place is vacant - take it
4434
active=0; // page is not active anymore
4435
pthread_cond_broadcast(&COND_active); // in case somebody's waiting
4436
pthread_mutex_unlock(&LOCK_active);
4437
pthread_mutex_unlock(&LOCK_sync);
4441
return err ? 0 : cookie;
4444
int TC_LOG_MMAP::sync()
4448
DBUG_ASSERT(syncing != active);
4451
sit down and relax - this can take a while...
4452
note - no locks are held at this point
4454
err= my_msync(fd, syncing->start, 1, MS_SYNC);
4456
/* page is synced. let's move it to the pool */
4457
pthread_mutex_lock(&LOCK_pool);
4458
pool_last->next=syncing;
4461
syncing->state= err ? ERROR : POOL;
4462
pthread_cond_broadcast(&syncing->cond); // signal "sync done"
4463
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4464
pthread_mutex_unlock(&LOCK_pool);
4466
/* marking 'syncing' slot free */
4467
pthread_mutex_lock(&LOCK_sync);
4469
pthread_cond_signal(&active->cond); // wake up a new syncer
4470
pthread_mutex_unlock(&LOCK_sync);
4475
erase xid from the page, update page free space counters/pointers.
4476
cookie points directly to the memory where xid was logged.
4479
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
4481
PAGE *p=pages+(cookie/tc_log_page_size);
4482
my_xid *x=(my_xid *)(data+cookie);
4484
DBUG_ASSERT(*x == xid);
4485
DBUG_ASSERT(x >= p->start && x < p->end);
4488
pthread_mutex_lock(&p->lock);
4490
DBUG_ASSERT(p->free <= p->size);
4491
set_if_smaller(p->ptr, x);
4492
if (p->free == p->size) // the page is completely empty
4493
statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
4494
if (p->waiters == 0) // the page is in pool and ready to rock
4495
pthread_cond_signal(&COND_pool); // ping ... for overflow()
4496
pthread_mutex_unlock(&p->lock);
4499
void TC_LOG_MMAP::close()
4504
pthread_mutex_destroy(&LOCK_sync);
4505
pthread_mutex_destroy(&LOCK_active);
4506
pthread_mutex_destroy(&LOCK_pool);
4507
pthread_cond_destroy(&COND_pool);
4509
data[0]='A'; // garble the first (signature) byte, in case my_delete fails
4511
for (i=0; i < npages; i++)
4513
if (pages[i].ptr == 0)
4515
pthread_mutex_destroy(&pages[i].lock);
4516
pthread_cond_destroy(&pages[i].cond);
4519
my_free((uchar*)pages, MYF(0));
4521
my_munmap((char*)data, (size_t)file_length);
4523
my_close(fd, MYF(0));
4525
if (inited>=5) // cannot do in the switch because of Windows
4526
my_delete(logname, MYF(MY_WME));
4530
int TC_LOG_MMAP::recover()
4533
PAGE *p=pages, *end_p=pages+npages;
4535
if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
4537
sql_print_error("Bad magic header in tc log");
4542
the first byte after magic signature is set to current
4543
number of storage engines on startup
4545
if (data[sizeof(tc_log_magic)] != total_ha_2pc)
4547
sql_print_error("Recovery failed! You must enable "
4548
"exactly %d storage engines that support "
4549
"two-phase commit protocol",
4550
data[sizeof(tc_log_magic)]);
4554
if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
4555
sizeof(my_xid), 0, 0, MYF(0)))
4558
for ( ; p < end_p ; p++)
4560
for (my_xid *x=p->start; x < p->end; x++)
4561
if (*x && my_hash_insert(&xids, (uchar *)x))
4565
if (ha_recover(&xids))
4569
bzero(data, (size_t)file_length);
4575
sql_print_error("Crash recovery failed. Either correct the problem "
4576
"(if it's, for example, out of memory error) and restart, "
4577
"or delete tc log and start mysqld with "
4578
"--tc-heuristic-recover={commit|rollback}");
4584
TC_LOG_DUMMY tc_log_dummy;
4585
TC_LOG_MMAP tc_log_mmap;
4588
Perform heuristic recovery, if --tc-heuristic-recover was used.
4591
no matter whether heuristic recovery was successful or not
4592
mysqld must exit. So, return value is the same in both cases.
4595
0 no heuristic recovery was requested
4597
1 heuristic recovery was performed
4600
int TC_LOG::using_heuristic_recover()
4602
if (!tc_heuristic_recover)
4605
sql_print_information("Heuristic crash recovery mode");
4607
sql_print_error("Heuristic crash recovery failed");
4608
sql_print_information("Please restart mysqld without --tc-heuristic-recover");
4612
/****** transaction coordinator log for 2pc - binlog() based solution ******/
4613
#define TC_LOG_BINLOG MYSQL_BIN_LOG
4617
keep in-memory list of prepared transactions
4618
(add to list in log(), remove on unlog())
4619
and copy it to the new binlog if rotated
4620
but let's check the behaviour of tc_log_page_waits first!
4623
int TC_LOG_BINLOG::open(const char *opt_name)
4628
DBUG_ASSERT(total_ha_2pc > 1);
4629
DBUG_ASSERT(opt_name && opt_name[0]);
4631
pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
4632
pthread_cond_init (&COND_prep_xids, 0);
4634
if (!my_b_inited(&index_file))
4636
/* There was a failure to open the index file, can't open the binlog */
4641
if (using_heuristic_recover())
4643
/* generate a new binlog to mask a corrupted one */
4644
open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
4649
if ((error= find_log_pos(&log_info, NullS, 1)))
4651
if (error != LOG_INFO_EOF)
4652
sql_print_error("find_log_pos() failed (error: %d)", error);
4663
Format_description_log_event fdle(BINLOG_VERSION);
4664
char log_name[FN_REFLEN];
4666
if (! fdle.is_valid())
4671
strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
4672
} while (!(error= find_next_log(&log_info, 1)));
4674
if (error != LOG_INFO_EOF)
4676
sql_print_error("find_log_pos() failed (error: %d)", error);
4680
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
4682
sql_print_error("%s", errmsg);
4686
if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
4687
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4688
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
4690
sql_print_information("Recovering after a crash using %s", opt_name);
4691
error= recover(&log, (Format_description_log_event *)ev);
4698
my_close(file, MYF(MY_WME));
4708
/** This is called on shutdown, after ha_panic. */
4709
void TC_LOG_BINLOG::close()
4711
DBUG_ASSERT(prepared_xids==0);
4712
pthread_mutex_destroy(&LOCK_prep_xids);
4713
pthread_cond_destroy (&COND_prep_xids);
4725
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
4727
DBUG_ENTER("TC_LOG_BINLOG::log");
4728
Xid_log_event xle(thd, xid);
4729
binlog_trx_data *trx_data=
4730
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
4732
We always commit the entire transaction when writing an XID. Also
4733
note that the return value is inverted.
4735
DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE));
4738
void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
4740
pthread_mutex_lock(&LOCK_prep_xids);
4741
DBUG_ASSERT(prepared_xids > 0);
4742
if (--prepared_xids == 0) {
4743
DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
4744
pthread_cond_signal(&COND_prep_xids);
4746
pthread_mutex_unlock(&LOCK_prep_xids);
4747
rotate_and_purge(0); // as ::write() did not rotate
4750
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
4756
if (! fdle->is_valid() ||
4757
hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
4758
sizeof(my_xid), 0, 0, MYF(0)))
4761
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
4763
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
4765
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
4767
if (ev->get_type_code() == XID_EVENT)
4769
Xid_log_event *xev=(Xid_log_event *)ev;
4770
uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
4774
my_hash_insert(&xids, x);
4779
if (ha_recover(&xids))
4782
free_root(&mem_root, MYF(0));
4787
free_root(&mem_root, MYF(0));
4790
sql_print_error("Crash recovery failed. Either correct the problem "
4791
"(if it's, for example, out of memory error) and restart, "
4792
"or delete (or rename) binary log and start mysqld with "
4793
"--tc-heuristic-recover={commit|rollback}");
4798
#ifdef INNODB_COMPATIBILITY_HOOKS
4800
Get the file name of the MySQL binlog.
4801
@return the name of the binlog file
4804
const char* mysql_bin_log_file_name(void)
4806
return mysql_bin_log.get_log_fname();
4809
Get the current position of the MySQL binlog.
4810
@return byte offset from the beginning of the binlog
4813
ulonglong mysql_bin_log_file_pos(void)
4815
return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
4817
#endif /* INNODB_COMPATIBILITY_HOOKS */
4820
struct st_mysql_storage_engine binlog_storage_engine=
4821
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
4823
mysql_declare_plugin(binlog)
4825
MYSQL_STORAGE_ENGINE_PLUGIN,
4826
&binlog_storage_engine,
4829
"This is a pseudo storage engine to represent the binlog in a transaction",
4831
binlog_init, /* Plugin Init */
4832
NULL, /* Plugin Deinit */
4834
NULL, /* status variables */
4835
NULL, /* system variables */
4836
NULL /* config options */
4838
mysql_declare_plugin_end;