1
/* Copyright (C) 2000-2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
24
Abort logging when we get an error in reading or writing log files
27
#include "mysql_priv.h"
29
#include "rpl_filter.h"
32
#include <mysys/my_dir.h>
35
#include <drizzled/plugin.h>
36
#include <drizzled/drizzled_error_messages.h>
38
/* max size of the log message */
39
#define MAX_LOG_BUFFER_SIZE 1024
40
#define MAX_USER_HOST_SIZE 512
41
#define MAX_TIME_SIZE 32
42
#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
44
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
48
MYSQL_BIN_LOG mysql_bin_log;
49
ulong sync_binlog_counter= 0;
51
static bool test_if_number(const char *str,
52
long *res, bool allow_wildcards);
53
static int binlog_init(void *p);
54
static int binlog_close_connection(handlerton *hton, THD *thd);
55
static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
56
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
57
static int binlog_commit(handlerton *hton, THD *thd, bool all);
58
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
59
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
62
sql_print_message_func sql_print_message_handlers[3] =
64
sql_print_information,
70
char *make_default_log_name(char *buff,const char* log_ext)
72
strmake(buff, pidfile_name, FN_REFLEN-5);
73
return fn_format(buff, buff, mysql_data_home, log_ext,
74
MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
78
Helper class to hold a mutex for the duration of the
81
Eliminates the need for explicit unlocking of mutexes on, e.g.,
82
error returns. On passing a null pointer, the sentry will not do
88
Mutex_sentry(pthread_mutex_t *mutex)
92
pthread_mutex_lock(mutex);
98
pthread_mutex_unlock(m_mutex);
103
pthread_mutex_t *m_mutex;
105
// It's not allowed to copy this object in any way
106
Mutex_sentry(Mutex_sentry const&);
107
void operator=(Mutex_sentry const&);
111
Helper class to store binary log transaction data.
113
class binlog_trx_data {
116
: at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
118
trans_log.end_of_file= max_binlog_cache_size;
123
assert(pending() == NULL);
124
close_cached_file(&trans_log);
127
my_off_t position() const {
128
return my_b_tell(&trans_log);
133
return pending() == NULL && my_b_tell(&trans_log) == 0;
137
Truncate the transaction cache to a certain position. This
138
includes deleting the pending event.
140
void truncate(my_off_t pos)
144
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
145
if (pos < before_stmt_pos)
146
before_stmt_pos= MY_OFF_T_UNDEF;
149
The only valid positions that can be truncated to are at the
150
beginning of a statement. We are relying on this fact to be able
151
to set the at_least_one_stmt flag correctly. In other word, if
152
we are truncating to the beginning of the transaction cache,
153
there will be no statements in the cache, otherwhise, we will
154
have at least one statement in the transaction cache.
156
at_least_one_stmt= (pos > 0);
160
Reset the entire contents of the transaction cache, emptying it
166
before_stmt_pos= MY_OFF_T_UNDEF;
167
trans_log.end_of_file= max_binlog_cache_size;
170
Rows_log_event *pending() const
175
void set_pending(Rows_log_event *const pending)
180
IO_CACHE trans_log; // The transaction cache
183
Boolean that is true if there is at least one statement in the
186
bool at_least_one_stmt;
190
Pending binrows event. This event is the event where the rows are
193
Rows_log_event *m_pending;
197
Binlog position before the start of the current statement.
199
my_off_t before_stmt_pos;
202
handlerton *binlog_hton;
205
/* Check if a given table is opened log table */
206
int check_if_log_table(uint db_len __attribute__((unused)),
207
const char *db __attribute__((unused)),
208
uint table_name_len __attribute__((unused)),
209
const char *table_name __attribute__((unused)),
210
uint check_if_opened __attribute__((unused)))
215
/* log event handlers */
217
bool Log_to_file_event_handler::
218
log_error(enum loglevel level, const char *format,
221
return vprint_msg_to_log(level, format, args);
224
void Log_to_file_event_handler::init_pthread_objects()
226
mysql_log.init_pthread_objects();
227
mysql_slow_log.init_pthread_objects();
231
/** Wrapper around MYSQL_LOG::write() for slow log. */
233
bool Log_to_file_event_handler::
234
log_slow(THD *thd, time_t current_time, time_t query_start_arg,
235
const char *user_host, uint user_host_len,
236
uint64_t query_utime, uint64_t lock_utime, bool is_command,
237
const char *sql_text, uint sql_text_len)
239
return mysql_slow_log.write(thd, current_time, query_start_arg,
240
user_host, user_host_len,
241
query_utime, lock_utime, is_command,
242
sql_text, sql_text_len);
247
Wrapper around MYSQL_LOG::write() for general log. We need it since we
248
want all log event handlers to have the same signature.
251
bool Log_to_file_event_handler::
252
log_general(THD *thd __attribute__((unused)),
253
time_t event_time, const char *user_host,
254
uint user_host_len, int thread_id,
255
const char *command_type, uint command_type_len,
256
const char *sql_text, uint sql_text_len,
257
CHARSET_INFO *client_cs __attribute__((unused)))
259
return mysql_log.write(event_time, user_host, user_host_len,
260
thread_id, command_type, command_type_len,
261
sql_text, sql_text_len);
265
bool Log_to_file_event_handler::init()
270
mysql_slow_log.open_slow_log(sys_var_slow_log_path.value);
273
mysql_log.open_query_log(sys_var_general_log_path.value);
275
is_initialized= true;
282
void Log_to_file_event_handler::cleanup()
285
mysql_slow_log.cleanup();
288
void Log_to_file_event_handler::flush()
290
/* reopen log files */
292
mysql_log.reopen_file();
294
mysql_slow_log.reopen_file();
298
Log error with all enabled log event handlers
303
level The level of the error significance: NOTE,
305
format format string for the error message
306
args list of arguments for the format string
313
bool LOGGER::error_log_print(enum loglevel level, const char *format,
317
Log_event_handler **current_handler;
319
/* currently we don't need locking here as there is no error_log table */
320
for (current_handler= error_log_handler_list ; *current_handler ;)
321
error= (*current_handler++)->log_error(level, format, args) || error;
327
void LOGGER::cleanup_base()
330
rwlock_destroy(&LOCK_logger);
331
if (file_log_handler)
332
file_log_handler->cleanup();
336
void LOGGER::cleanup_end()
339
if (file_log_handler)
340
delete file_log_handler;
345
Perform basic log initialization: create file-based log handler and
348
void LOGGER::init_base()
354
Here we create file log handler. We don't do it for the table log handler
355
here as it cannot be created so early. The reason is THD initialization,
356
which depends on the system variables (parsed later).
358
if (!file_log_handler)
359
file_log_handler= new Log_to_file_event_handler;
361
/* by default we use traditional error log */
362
init_error_log(LOG_FILE);
364
file_log_handler->init_pthread_objects();
365
my_rwlock_init(&LOCK_logger, NULL);
369
bool LOGGER::flush_logs(THD *thd __attribute__((unused)))
374
Now we lock logger, as nobody should be able to use logging routines while
375
log tables are closed
377
logger.lock_exclusive();
379
/* reopen log files */
380
file_log_handler->flush();
382
/* end of log flush */
389
Log slow query with all enabled log event handlers
394
thd THD of the query being logged
395
query The query being logged
396
query_length The length of the query string
397
current_utime Current time in microseconds (from undefined start)
404
bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
405
uint64_t current_utime)
409
Log_event_handler **current_handler;
410
bool is_command= false;
411
char user_host_buff[MAX_USER_HOST_SIZE];
412
Security_context *sctx= thd->security_ctx;
413
uint user_host_len= 0;
414
uint64_t query_utime, lock_utime;
417
Print the message to the buffer if we have slow log enabled
420
if (*slow_log_handler_list)
424
/* do not log slow queries from replication threads */
425
if (thd->slave_thread && !opt_log_slow_slave_statements)
435
/* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
436
user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
437
sctx->priv_user ? sctx->priv_user : "", "[",
438
sctx->user ? sctx->user : "", "] @ ",
439
sctx->host ? sctx->host : "", " [",
440
sctx->ip ? sctx->ip : "", "]", NullS) -
443
current_time= my_time_possible_from_micro(current_utime);
444
if (thd->start_utime)
446
query_utime= (current_utime - thd->start_utime);
447
lock_utime= (thd->utime_after_lock - thd->start_utime);
451
query_utime= lock_utime= 0;
457
query= command_name[thd->command].str;
458
query_length= command_name[thd->command].length;
461
for (current_handler= slow_log_handler_list; *current_handler ;)
462
error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
463
user_host_buff, user_host_len,
464
query_utime, lock_utime, is_command,
465
query, query_length) || error;
472
bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
473
const char *query, uint query_length)
476
Log_event_handler **current_handler= general_log_handler_list;
477
char user_host_buff[MAX_USER_HOST_SIZE];
478
Security_context *sctx= thd->security_ctx;
480
uint user_host_len= 0;
484
id= thd->thread_id; /* Normal thread */
486
id= 0; /* Log from connect handler */
494
user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
495
sctx->priv_user ? sctx->priv_user : "", "[",
496
sctx->user ? sctx->user : "", "] @ ",
497
sctx->host ? sctx->host : "", " [",
498
sctx->ip ? sctx->ip : "", "]", NullS) -
501
current_time= my_time(0);
503
while (*current_handler)
504
error|= (*current_handler++)->
505
log_general(thd, current_time, user_host_buff,
507
command_name[(uint) command].str,
508
command_name[(uint) command].length,
510
thd->variables.character_set_client) || error;
516
bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
517
const char *format, va_list args)
519
uint message_buff_len= 0;
520
char message_buff[MAX_LOG_BUFFER_SIZE];
522
/* prepare message */
524
message_buff_len= vsnprintf(message_buff, sizeof(message_buff),
527
message_buff[0]= '\0';
529
return general_log_write(thd, command, message_buff, message_buff_len);
532
void LOGGER::init_error_log(uint error_log_printer)
534
if (error_log_printer & LOG_NONE)
536
error_log_handler_list[0]= 0;
540
switch (error_log_printer) {
542
error_log_handler_list[0]= file_log_handler;
543
error_log_handler_list[1]= 0;
548
void LOGGER::init_slow_log(uint slow_log_printer)
550
if (slow_log_printer & LOG_NONE)
552
slow_log_handler_list[0]= 0;
556
slow_log_handler_list[0]= file_log_handler;
557
slow_log_handler_list[1]= 0;
560
void LOGGER::init_general_log(uint general_log_printer)
562
if (general_log_printer & LOG_NONE)
564
general_log_handler_list[0]= 0;
568
general_log_handler_list[0]= file_log_handler;
569
general_log_handler_list[1]= 0;
573
bool LOGGER::activate_log_handler(THD* thd __attribute__((unused)),
576
MYSQL_QUERY_LOG *file_log;
583
file_log= file_log_handler->get_mysql_slow_log();
585
file_log->open_slow_log(sys_var_slow_log_path.value);
586
init_slow_log(log_output_options);
590
case QUERY_LOG_GENERAL:
593
file_log= file_log_handler->get_mysql_log();
595
file_log->open_query_log(sys_var_general_log_path.value);
596
init_general_log(log_output_options);
608
void LOGGER::deactivate_log_handler(THD *thd __attribute__((unused)),
616
tmp_opt= &opt_slow_log;
617
file_log= file_log_handler->get_mysql_slow_log();
619
case QUERY_LOG_GENERAL:
621
file_log= file_log_handler->get_mysql_log();
624
assert(0); // Impossible
636
int LOGGER::set_handlers(uint error_log_printer,
637
uint slow_log_printer,
638
uint general_log_printer)
640
/* error log table is not supported yet */
643
init_error_log(error_log_printer);
644
init_slow_log(slow_log_printer);
645
init_general_log(general_log_printer);
654
Save position of binary log transaction cache.
657
binlog_trans_log_savepos()
659
thd The thread to take the binlog data from
660
pos Pointer to variable where the position will be stored
664
Save the current position in the binary log transaction cache into
665
the variable pointed to by 'pos'
669
binlog_trans_log_savepos(THD *thd, my_off_t *pos)
672
if (thd_get_ha_data(thd, binlog_hton) == NULL)
673
thd->binlog_setup_trx_data();
674
binlog_trx_data *const trx_data=
675
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
676
assert(mysql_bin_log.is_open());
677
*pos= trx_data->position();
683
Truncate the binary log transaction cache.
686
binlog_trans_log_truncate()
688
thd The thread to take the binlog data from
689
pos Position to truncate to
693
Truncate the binary log to the given position. Will not change
698
binlog_trans_log_truncate(THD *thd, my_off_t pos)
700
assert(thd_get_ha_data(thd, binlog_hton) != NULL);
701
/* Only true if binlog_trans_log_savepos() wasn't called before */
702
assert(pos != ~(my_off_t) 0);
704
binlog_trx_data *const trx_data=
705
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
706
trx_data->truncate(pos);
712
this function is mostly a placeholder.
713
conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
714
should be moved here.
717
int binlog_init(void *p)
719
binlog_hton= (handlerton *)p;
720
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
721
binlog_hton->db_type=DB_TYPE_BINLOG;
722
binlog_hton->savepoint_offset= sizeof(my_off_t);
723
binlog_hton->close_connection= binlog_close_connection;
724
binlog_hton->savepoint_set= binlog_savepoint_set;
725
binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
726
binlog_hton->commit= binlog_commit;
727
binlog_hton->rollback= binlog_rollback;
728
binlog_hton->prepare= binlog_prepare;
729
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
734
static int binlog_close_connection(handlerton *hton __attribute__((unused)),
737
binlog_trx_data *const trx_data=
738
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
739
assert(trx_data->empty());
740
thd_set_ha_data(thd, binlog_hton, NULL);
741
trx_data->~binlog_trx_data();
742
my_free((uchar*)trx_data, MYF(0));
752
thd The thread whose transaction should be ended
753
trx_data Pointer to the transaction data to use
754
end_ev The end event to use, or NULL
755
all True if the entire transaction should be ended, false if
756
only the statement transaction should be ended.
760
End the currently open transaction. The transaction can be either
761
a real transaction (if 'all' is true) or a statement transaction
764
If 'end_ev' is NULL, the transaction is a rollback of only
765
transactional tables, so the transaction cache will be truncated
766
to either just before the last opened statement transaction (if
767
'all' is false), or reset completely (if 'all' is true).
770
binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
771
Log_event *end_ev, bool all)
774
IO_CACHE *trans_log= &trx_data->trans_log;
777
NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
778
only transactional tables. If the transaction contain changes to
779
any non-transactiona tables, we need write the transaction and log
785
Doing a commit or a rollback including non-transactional tables,
786
i.e., ending a transaction where we might write the transaction
787
cache to the binary log.
789
We can always end the statement when ending a transaction since
790
transactions are not allowed inside stored functions. If they
791
were, we would have to ensure that we're not ending a statement
792
inside a stored function.
794
thd->binlog_flush_pending_rows_event(true);
796
error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
800
We need to step the table map version after writing the
801
transaction cache to disk.
803
mysql_bin_log.update_table_map_version();
804
statistic_increment(binlog_cache_use, &LOCK_status);
805
if (trans_log->disk_writes != 0)
807
statistic_increment(binlog_cache_disk_use, &LOCK_status);
808
trans_log->disk_writes= 0;
814
If rolling back an entire transaction or a single statement not
815
inside a transaction, we reset the transaction cache.
817
If rolling back a statement in a transaction, we truncate the
818
transaction cache to remove the statement.
820
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
824
assert(!thd->binlog_get_pending_rows_event());
825
thd->clear_binlog_table_maps();
828
trx_data->truncate(trx_data->before_stmt_pos);
831
We need to step the table map version on a rollback to ensure
832
that a new table map event is generated instead of the one that
833
was written to the thrown-away transaction cache.
835
mysql_bin_log.update_table_map_version();
841
static int binlog_prepare(handlerton *hton __attribute__((unused)),
842
THD *thd __attribute__((unused)),
843
bool all __attribute__((unused)))
847
just pretend we can do 2pc, so that MySQL won't
849
real work will be done in MYSQL_BIN_LOG::log_xid()
854
#define YESNO(X) ((X) ? "yes" : "no")
857
This function is called once after each statement.
859
It has the responsibility to flush the transaction cache to the
860
binlog file on commits.
862
@param hton The binlog handlerton.
863
@param thd The client thread that executes the transaction.
864
@param all This is @c true if this is a real transaction commit, and
867
@see handlerton::commit
869
static int binlog_commit(handlerton *hton __attribute__((unused)),
872
binlog_trx_data *const trx_data=
873
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
875
if (trx_data->empty())
877
// we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid()
883
Decision table for committing a transaction. The top part, the
884
*conditions* represent different cases that can occur, and hte
885
bottom part, the *actions*, represent what should be done in that
888
Real transaction 'all' was true
890
Statement in cache There were at least one statement in the
893
In transaction We are inside a transaction
895
Stmt modified non-trans The statement being committed modified a
896
non-transactional table
898
All modified non-trans Some statement before this one in the
899
transaction modified a non-transactional
903
============================= = = = = = = = = = = = = = = = =
904
Real transaction N N N N N N N N N N N N N N N N
905
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
906
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
907
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
908
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
910
Action: (C)ommit/(A)ccumulate C C - C A C - C - - - - A A - A
911
============================= = = = = = = = = = = = = = = = =
914
============================= = = = = = = = = = = = = = = = =
915
Real transaction Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y
916
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
917
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
918
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
919
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
921
(C)ommit/(A)ccumulate/(-) - - - - C C - C - - - - C C - C
922
============================= = = = = = = = = = = = = = = = =
924
In other words, we commit the transaction if and only if both of
925
the following are true:
926
- We are not in a transaction and committing a statement
928
- We are in a transaction and one (or more) of the following are
931
- A full transaction is committed
935
- A non-transactional statement is committed and there is
938
Otherwise, we accumulate the statement
940
uint64_t const in_transaction=
941
thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
942
if ((in_transaction && (all || (!trx_data->at_least_one_stmt && thd->transaction.stmt.modified_non_trans_table))) || (!in_transaction && !all))
944
Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), true, false);
945
qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
946
int error= binlog_end_trans(thd, trx_data, &qev, all);
953
This function is called when a transaction involving a transactional
954
table is rolled back.
956
It has the responsibility to flush the transaction cache to the
957
binlog file. However, if the transaction does not involve
958
non-transactional tables, nothing needs to be logged.
960
@param hton The binlog handlerton.
961
@param thd The client thread that executes the transaction.
962
@param all This is @c true if this is a real transaction rollback, and
965
@see handlerton::rollback
967
static int binlog_rollback(handlerton *hton __attribute__((unused)),
971
binlog_trx_data *const trx_data=
972
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
974
if (trx_data->empty()) {
979
if ((all && thd->transaction.all.modified_non_trans_table) ||
980
(!all && thd->transaction.stmt.modified_non_trans_table) ||
981
(thd->options & OPTION_KEEP_LOG))
984
We write the transaction cache with a rollback last if we have
985
modified any non-transactional table. We do this even if we are
986
committing a single statement that has modified a
987
non-transactional table since it can have modified a
988
transactional table in that statement as well, which needs to be
989
rolled back on the slave.
991
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), true, false);
992
qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
993
error= binlog_end_trans(thd, trx_data, &qev, all);
995
else if ((all && !thd->transaction.all.modified_non_trans_table) ||
996
(!all && !thd->transaction.stmt.modified_non_trans_table))
999
If we have modified only transactional tables, we can truncate
1000
the transaction cache without writing anything to the binary
1003
error= binlog_end_trans(thd, trx_data, 0, all);
1010
How do we handle this (unlikely but legal) case:
1012
[transaction] + [update to non-trans table] + [rollback to savepoint] ?
1014
The problem occurs when a savepoint is before the update to the
1015
non-transactional table. Then when there's a rollback to the savepoint, if we
1016
simply truncate the binlog cache, we lose the part of the binlog cache where
1017
the update is. If we want to not lose it, we need to write the SAVEPOINT
1018
command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1019
is easy: it's just write at the end of the binlog cache, but the former
1020
should be *inserted* to the place where the user called SAVEPOINT. The
1021
solution is that when the user calls SAVEPOINT, we write it to the binlog
1022
cache (so no need to later insert it). As transactions are never intermixed
1023
in the binary log (i.e. they are serialized), we won't have conflicts with
1024
savepoint names when using mysqlbinlog or in the slave SQL thread.
1025
Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1026
non-transactional table, we don't truncate the binlog cache but instead write
1027
ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1028
will chop the SAVEPOINT command from the binlog cache, which is good as in
1029
that case there is no need to have it in the binlog).
1032
static int binlog_savepoint_set(handlerton *hton __attribute__((unused)),
1035
binlog_trans_log_savepos(thd, (my_off_t*) sv);
1036
/* Write it to the binary log */
1039
thd->binlog_query(THD::STMT_QUERY_TYPE,
1040
thd->query, thd->query_length, true, false);
1044
static int binlog_savepoint_rollback(handlerton *hton __attribute__((unused)),
1048
Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1049
non-transactional table. Otherwise, truncate the binlog cache starting
1050
from the SAVEPOINT command.
1052
if (unlikely(thd->transaction.all.modified_non_trans_table ||
1053
(thd->options & OPTION_KEEP_LOG)))
1056
thd->binlog_query(THD::STMT_QUERY_TYPE,
1057
thd->query, thd->query_length, true, false);
1060
binlog_trans_log_truncate(thd, *(my_off_t*)sv);
1065
int check_binlog_magic(IO_CACHE* log, const char** errmsg)
1068
assert(my_b_tell(log) == 0);
1070
if (my_b_read(log, (uchar*) magic, sizeof(magic)))
1072
*errmsg = "I/O error reading the header from the binary log";
1073
sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
1077
if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
1079
*errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
1086
File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
1090
if ((file = my_open(log_file_name, O_RDONLY | O_BINARY | O_SHARE,
1093
sql_print_error("Failed to open log (file '%s', errno %d)",
1094
log_file_name, my_errno);
1095
*errmsg = "Could not open log file";
1098
if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
1099
MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
1101
sql_print_error("Failed to create a cache on log (file '%s')",
1103
*errmsg = "Could not open log file";
1106
if (check_binlog_magic(log,errmsg))
1113
my_close(file,MYF(0));
1121
Find a unique filename for 'filename.#'.
1123
Set '#' to a number as low as possible.
1126
nonzero if not possible to get unique filename
1129
static int find_uniq_filename(char *name)
1133
char buff[FN_REFLEN];
1134
struct st_my_dir *dir_info;
1135
register struct fileinfo *file_info;
1137
size_t buf_length, length;
1140
length= dirname_part(buff, name, &buf_length);
1141
start= name + length;
1145
length= (size_t) (end-start+1);
1147
if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
1148
{ // This shouldn't happen
1149
strmov(end,".1"); // use name+1
1152
file_info= dir_info->dir_entry;
1153
for (i=dir_info->number_off_files ; i-- ; file_info++)
1155
if (memcmp((uchar*) file_info->name, (uchar*) start, length) == 0 &&
1156
test_if_number(file_info->name+length, &number,0))
1158
set_if_bigger(max_found,(ulong) number);
1161
my_dirend(dir_info);
1164
sprintf(end,"%06ld",max_found+1);
1169
void MYSQL_LOG::init(enum_log_type log_type_arg,
1170
enum cache_type io_cache_type_arg)
1172
log_type= log_type_arg;
1173
io_cache_type= io_cache_type_arg;
1179
Open a (new) log file.
1184
log_name The name of the log to open
1185
log_type_arg The type of the log. E.g. LOG_NORMAL
1186
new_name The new name for the logfile. This is only needed
1187
when the method is used to open the binlog file.
1188
io_cache_type_arg The type of the IO_CACHE to use for this log file
1191
Open the logfile, init IO_CACHE and write startup messages
1192
(in case of general and slow query logs).
1199
bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
1200
const char *new_name, enum cache_type io_cache_type_arg)
1202
char buff[FN_REFLEN];
1204
int open_flags= O_CREAT | O_BINARY;
1208
init(log_type_arg, io_cache_type_arg);
1210
if (!(name= my_strdup(log_name, MYF(MY_WME))))
1212
name= (char *)log_name; // for the error message
1217
strmov(log_file_name, new_name);
1218
else if (generate_new_name(log_file_name, name))
1221
if (io_cache_type == SEQ_READ_APPEND)
1222
open_flags |= O_RDWR | O_APPEND;
1224
open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
1228
if ((file= my_open(log_file_name, open_flags,
1229
MYF(MY_WME | ME_WAITTANG))) < 0 ||
1230
init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
1231
my_tell(file, MYF(MY_WME)), 0,
1232
MYF(MY_WME | MY_NABP |
1233
((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
1236
if (log_type == LOG_NORMAL)
1239
int len=snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
1240
"started with:\nTCP Port: %d, Named Pipe: %s\n",
1241
my_progname, server_version, MYSQL_COMPILATION_COMMENT,
1244
end= strnmov(buff + len, "Time Id Command Argument\n",
1245
sizeof(buff) - len);
1246
if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
1247
flush_io_cache(&log_file))
1251
log_state= LOG_OPENED;
1255
sql_print_error("Could not use %s for logging (error %d). \
1256
Turning logging off for the whole duration of the MySQL server process. \
1257
To turn it on again: fix the cause, \
1258
shutdown the MySQL server and restart it.", name, errno);
1260
my_close(file, MYF(0));
1261
end_io_cache(&log_file);
1263
log_state= LOG_CLOSED;
1267
MYSQL_LOG::MYSQL_LOG()
1268
: name(0), write_error(false), inited(false), log_type(LOG_UNKNOWN),
1269
log_state(LOG_CLOSED)
1272
We don't want to initialize LOCK_Log here as such initialization depends on
1273
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1274
called only in main(). Doing initialization here would make it happen
1277
memset((char*) &log_file, 0, sizeof(log_file));
1280
void MYSQL_LOG::init_pthread_objects()
1282
assert(inited == 0);
1284
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1292
exiting Bitmask. For the slow and general logs the only used bit is
1293
LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call
1294
open at once after close.
1297
One can do an open on the object at once after doing a close.
1298
The internal structures are not freed until cleanup() is called
1301
void MYSQL_LOG::close(uint exiting)
1302
{ // One can't set log_type here!
1303
if (log_state == LOG_OPENED)
1305
end_io_cache(&log_file);
1307
if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error)
1310
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1313
if (my_close(log_file.file, MYF(MY_WME)) && ! write_error)
1316
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1320
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
1325
/** This is called only once. */
1327
void MYSQL_LOG::cleanup()
1332
(void) pthread_mutex_destroy(&LOCK_log);
1339
int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name)
1341
fn_format(new_name, log_name, mysql_data_home, "", 4);
1342
if (log_type == LOG_BIN)
1344
if (!fn_ext(log_name)[0])
1346
if (find_uniq_filename(new_name))
1348
sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
1364
Reopen the log file. The method is used during FLUSH LOGS
1365
and locks LOCK_log mutex
1369
void MYSQL_QUERY_LOG::reopen_file()
1378
pthread_mutex_lock(&LOCK_log);
1381
name= 0; // Don't free name
1382
close(LOG_CLOSE_TO_BE_OPENED);
1385
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
1388
open(save_name, log_type, 0, io_cache_type);
1389
my_free(save_name, MYF(0));
1391
pthread_mutex_unlock(&LOCK_log);
1398
Write a command to traditional general log file
1403
event_time command start timestamp
1404
user_host the pointer to the string with user@host info
1405
user_host_len length of the user_host string. this is computed once
1406
and passed to all general log event handlers
1407
thread_id Id of the thread, issued a query
1408
command_type the type of the command being logged
1409
command_type_len the length of the string above
1410
sql_text the very text of the query being executed
1411
sql_text_len the length of sql_text string
1415
Log given command to to normal (not rotable) log file
1419
TRUE - error occured
1422
bool MYSQL_QUERY_LOG::write(time_t event_time,
1423
const char *user_host __attribute__((unused)),
1424
uint user_host_len __attribute__((unused)),
1426
const char *command_type, uint command_type_len,
1427
const char *sql_text, uint sql_text_len)
1431
char local_time_buff[MAX_TIME_SIZE];
1433
uint time_buff_len= 0;
1435
(void) pthread_mutex_lock(&LOCK_log);
1437
/* Test if someone closed between the is_open test and lock */
1440
/* Note that my_b_write() assumes it knows the length for this */
1441
if (event_time != last_time)
1443
last_time= event_time;
1445
localtime_r(&event_time, &start);
1447
time_buff_len= snprintf(local_time_buff, MAX_TIME_SIZE,
1448
"%02d%02d%02d %2d:%02d:%02d",
1449
start.tm_year % 100, start.tm_mon + 1,
1450
start.tm_mday, start.tm_hour,
1451
start.tm_min, start.tm_sec);
1453
if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
1457
if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
1460
/* command_type, thread_id */
1461
length= snprintf(buff, 32, "%5ld ", (long) thread_id);
1463
if (my_b_write(&log_file, (uchar*) buff, length))
1466
if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
1469
if (my_b_write(&log_file, (uchar*) "\t", 1))
1473
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
1476
if (my_b_write(&log_file, (uchar*) "\n", 1) ||
1477
flush_io_cache(&log_file))
1481
(void) pthread_mutex_unlock(&LOCK_log);
1488
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1490
(void) pthread_mutex_unlock(&LOCK_log);
1496
Log a query to the traditional slow log file
1501
thd THD of the query
1502
current_time current timestamp
1503
query_start_arg command start timestamp
1504
user_host the pointer to the string with user@host info
1505
user_host_len length of the user_host string. this is computed once
1506
and passed to all general log event handlers
1507
query_utime Amount of time the query took to execute (in microseconds)
1508
lock_utime Amount of time the query was locked (in microseconds)
1509
is_command The flag, which determines, whether the sql_text is a
1510
query or an administrator command.
1511
sql_text the very text of the query or administrator command
1513
sql_text_len the length of sql_text string
1517
Log a query to the slow log file.
1521
TRUE - error occured
1524
bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
1525
time_t query_start_arg __attribute__((unused)),
1526
const char *user_host,
1527
uint user_host_len, uint64_t query_utime,
1528
uint64_t lock_utime, bool is_command,
1529
const char *sql_text, uint sql_text_len)
1533
(void) pthread_mutex_lock(&LOCK_log);
1537
(void) pthread_mutex_unlock(&LOCK_log);
1542
{ // Safety agains reopen
1544
char buff[80], *end;
1545
char query_time_buff[22+7], lock_time_buff[22+7];
1549
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1551
if (current_time != last_time)
1553
last_time= current_time;
1555
localtime_r(¤t_time, &start);
1557
buff_len= snprintf(buff, sizeof buff,
1558
"# Time: %02d%02d%02d %2d:%02d:%02d\n",
1559
start.tm_year % 100, start.tm_mon + 1,
1560
start.tm_mday, start.tm_hour,
1561
start.tm_min, start.tm_sec);
1563
/* Note that my_b_write() assumes it knows the length for this */
1564
if (my_b_write(&log_file, (uchar*) buff, buff_len))
1567
const uchar uh[]= "# User@Host: ";
1568
if (my_b_write(&log_file, uh, sizeof(uh) - 1))
1570
if (my_b_write(&log_file, (uchar*) user_host, user_host_len))
1572
if (my_b_write(&log_file, (uchar*) "\n", 1))
1575
/* For slow query log */
1576
sprintf(query_time_buff, "%.6f", uint64_t2double(query_utime)/1000000.0);
1577
sprintf(lock_time_buff, "%.6f", uint64_t2double(lock_utime)/1000000.0);
1578
if (my_b_printf(&log_file,
1579
"# Query_time: %s Lock_time: %s"
1580
" Rows_sent: %lu Rows_examined: %lu\n",
1581
query_time_buff, lock_time_buff,
1582
(ulong) thd->sent_row_count,
1583
(ulong) thd->examined_row_count) == (uint) -1)
1585
if (thd->db && strcmp(thd->db, db))
1586
{ // Database changed
1587
if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
1591
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
1593
end=strmov(end, ",last_insert_id=");
1594
end=int64_t10_to_str((int64_t)
1595
thd->first_successful_insert_id_in_prev_stmt_for_binlog,
1598
// Save value if we do an insert.
1599
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
1601
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1603
end=strmov(end,",insert_id=");
1604
end=int64_t10_to_str((int64_t)
1605
thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
1611
This info used to show up randomly, depending on whether the query
1612
checked the query start time or not. now we always write current
1613
timestamp to the slow log
1615
end= strmov(end, ",timestamp=");
1616
end= int10_to_str((long) current_time, end, 10);
1622
if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
1623
my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
1628
end= strxmov(buff, "# administrator command: ", NullS);
1629
buff_len= (ulong) (end - buff);
1630
my_b_write(&log_file, (uchar*) buff, buff_len);
1632
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
1633
my_b_write(&log_file, (uchar*) ";\n",2) ||
1634
flush_io_cache(&log_file))
1642
sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
1646
(void) pthread_mutex_unlock(&LOCK_log);
1653
The following should be using fn_format(); We just need to
1654
first change fn_format() to cut the file name if it's too long.
1656
const char *MYSQL_LOG::generate_name(const char *log_name,
1658
bool strip_ext, char *buff)
1660
if (!log_name || !log_name[0])
1662
strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
1663
return (const char *)
1664
fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
1666
// get rid of extension if the log is binary to avoid problems
1669
char *p= fn_ext(log_name);
1670
uint length= (uint) (p - log_name);
1671
strmake(buff, log_name, min(length, FN_REFLEN));
1672
return (const char*)buff;
1679
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
1680
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
1681
need_start_event(true), m_table_map_version(0),
1682
description_event_for_exec(0), description_event_for_queue(0)
1685
We don't want to initialize locks here as such initialization depends on
1686
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1687
called only in main(). Doing initialization here would make it happen
1690
index_file_name[0] = 0;
1691
memset((char*) &index_file, 0, sizeof(index_file));
1694
/* this is called only once */
1696
void MYSQL_BIN_LOG::cleanup()
1701
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
1702
delete description_event_for_queue;
1703
delete description_event_for_exec;
1704
(void) pthread_mutex_destroy(&LOCK_log);
1705
(void) pthread_mutex_destroy(&LOCK_index);
1706
(void) pthread_cond_destroy(&update_cond);
1712
/* Init binlog-specific vars */
1713
void MYSQL_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
1715
no_auto_events= no_auto_events_arg;
1716
max_size= max_size_arg;
1721
void MYSQL_BIN_LOG::init_pthread_objects()
1723
assert(inited == 0);
1725
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1726
(void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
1727
(void) pthread_cond_init(&update_cond, 0);
1731
bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
1732
const char *log_name)
1734
File index_file_nr= -1;
1735
assert(!my_b_inited(&index_file));
1738
First open of this class instance
1739
Create an index file that will hold all file names uses for logging.
1740
Add new entries to the end of it.
1742
myf opt= MY_UNPACK_FILENAME;
1743
if (!index_file_name_arg)
1745
index_file_name_arg= log_name; // Use same basename for index file
1746
opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
1748
fn_format(index_file_name, index_file_name_arg, mysql_data_home,
1750
if ((index_file_nr= my_open(index_file_name,
1751
O_RDWR | O_CREAT | O_BINARY ,
1752
MYF(MY_WME))) < 0 ||
1753
my_sync(index_file_nr, MYF(MY_WME)) ||
1754
init_io_cache(&index_file, index_file_nr,
1755
IO_SIZE, WRITE_CACHE,
1756
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
1757
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
1760
TODO: all operations creating/deleting the index file or a log, should
1761
call my_sync_dir() or my_sync_dir_by_file() to be durable.
1762
TODO: file creation should be done with my_create() not my_open().
1764
if (index_file_nr >= 0)
1765
my_close(index_file_nr,MYF(0));
1773
Open a (new) binlog file.
1775
- Open the log file and the index file. Register the new
1777
- When calling this when the file is in use, you must have a locks
1778
on LOCK_log and LOCK_index.
1786
bool MYSQL_BIN_LOG::open(const char *log_name,
1787
enum_log_type log_type_arg,
1788
const char *new_name,
1789
enum cache_type io_cache_type_arg,
1790
bool no_auto_events_arg,
1792
bool null_created_arg)
1798
/* open the main log file */
1799
if (MYSQL_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
1800
return(1); /* all warnings issued */
1802
init(no_auto_events_arg, max_size_arg);
1806
assert(log_type == LOG_BIN);
1809
bool write_file_name_to_index_file=0;
1811
if (!my_b_filelength(&log_file))
1814
The binary log file was empty (probably newly created)
1815
This is the normal case and happens when the user doesn't specify
1816
an extension for the binary log files.
1817
In this case we write a standard header to it.
1819
if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
1820
BIN_LOG_HEADER_SIZE))
1822
bytes_written+= BIN_LOG_HEADER_SIZE;
1823
write_file_name_to_index_file= 1;
1826
assert(my_b_inited(&index_file) != 0);
1827
reinit_io_cache(&index_file, WRITE_CACHE,
1828
my_b_filelength(&index_file), 0, 0);
1829
if (need_start_event && !no_auto_events)
1832
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
1833
even if this is not the very first binlog.
1835
Format_description_log_event s(BINLOG_VERSION);
1837
don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
1838
as we won't be able to reset it later
1840
if (io_cache_type == WRITE_CACHE)
1841
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
1844
s.dont_set_created= null_created_arg;
1845
if (s.write(&log_file))
1847
bytes_written+= s.data_written;
1849
if (description_event_for_queue &&
1850
description_event_for_queue->binlog_version>=4)
1853
This is a relay log written to by the I/O slave thread.
1854
Write the event so that others can later know the format of this relay
1856
Note that this event is very close to the original event from the
1857
master (it has binlog version of the master, event types of the
1858
master), so this is suitable to parse the next relay log's event. It
1859
has been produced by
1860
Format_description_log_event::Format_description_log_event(char* buf,).
1861
Why don't we want to write the description_event_for_queue if this
1862
event is for format<4 (3.23 or 4.x): this is because in that case, the
1863
description_event_for_queue describes the data received from the
1864
master, but not the data written to the relay log (*conversion*),
1865
which is in format 4 (slave's).
1868
Set 'created' to 0, so that in next relay logs this event does not
1869
trigger cleaning actions on the slave in
1870
Format_description_log_event::apply_event_impl().
1872
description_event_for_queue->created= 0;
1873
/* Don't set log_pos in event header */
1874
description_event_for_queue->artificial_event=1;
1876
if (description_event_for_queue->write(&log_file))
1878
bytes_written+= description_event_for_queue->data_written;
1880
if (flush_io_cache(&log_file) ||
1881
my_sync(log_file.file, MYF(MY_WME)))
1884
if (write_file_name_to_index_file)
1887
As this is a new log file, we write the file name to the index
1888
file. As every time we write to the index file, we sync it.
1890
if (my_b_write(&index_file, (uchar*) log_file_name,
1891
strlen(log_file_name)) ||
1892
my_b_write(&index_file, (uchar*) "\n", 1) ||
1893
flush_io_cache(&index_file) ||
1894
my_sync(index_file.file, MYF(MY_WME)))
1898
log_state= LOG_OPENED;
1903
sql_print_error("Could not use %s for logging (error %d). \
1904
Turning logging off for the whole duration of the MySQL server process. \
1905
To turn it on again: fix the cause, \
1906
shutdown the MySQL server and restart it.", name, errno);
1908
my_close(file,MYF(0));
1909
end_io_cache(&log_file);
1910
end_io_cache(&index_file);
1912
log_state= LOG_CLOSED;
1917
int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
1919
pthread_mutex_lock(&LOCK_log);
1920
int ret = raw_get_current_log(linfo);
1921
pthread_mutex_unlock(&LOCK_log);
1925
int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
1927
strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
1928
linfo->pos = my_b_tell(&log_file);
1933
Move all data up in a file in an filename index file.
1935
We do the copy outside of the IO_CACHE as the cache buffers would just
1936
make things slower and more complicated.
1937
In most cases the copy loop should only do one read.
1939
@param index_file File to move
1940
@param offset Move everything from here to beginning
1943
File will be truncated to be 'offset' shorter or filled up with newlines
1949
#ifdef HAVE_REPLICATION
1951
static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
1954
my_off_t init_offset= offset;
1955
File file= index_file->file;
1956
uchar io_buf[IO_SIZE*2];
1958
for (;; offset+= bytes_read)
1960
(void) my_seek(file, offset, MY_SEEK_SET, MYF(0));
1961
if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
1965
break; // end of file
1966
(void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
1967
if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
1970
/* The following will either truncate the file or fill the end with \n' */
1971
if (ftruncate(file, offset - init_offset) || my_sync(file, MYF(MY_WME)))
1974
/* Reset data in old index cache */
1975
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
1982
#endif /* HAVE_REPLICATION */
1985
Find the position in the log-index-file for the given log name.
1987
@param linfo Store here the found log file name and position to
1988
the NEXT log file name in the index file.
1989
@param log_name Filename to find in the index file.
1990
Is a null pointer if we want to read the first entry
1991
@param need_lock Set this to 1 if the parent doesn't already have a
1995
On systems without the truncate function the file will end with one or
1996
more empty lines. These will be ignored when reading the file.
2001
LOG_INFO_EOF End of log-index-file found
2003
LOG_INFO_IO Got IO error while reading file
2006
int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
2010
char *fname= linfo->log_file_name;
2011
uint log_name_len= log_name ? (uint) strlen(log_name) : 0;
2014
Mutex needed because we need to make sure the file pointer does not
2015
move from under our feet
2018
pthread_mutex_lock(&LOCK_index);
2019
safe_mutex_assert_owner(&LOCK_index);
2021
/* As the file is flushed, we can't get an error here */
2022
(void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
2027
my_off_t offset= my_b_tell(&index_file);
2028
/* If we get 0 or 1 characters, this is the end of the file */
2030
if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2032
/* Did not find the given entry; Return not found or error */
2033
error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2037
// if the log entry matches, null string matching anything
2039
(log_name_len == length-1 && fname[log_name_len] == '\n' &&
2040
!memcmp(fname, log_name, log_name_len)))
2042
fname[length-1]=0; // remove last \n
2043
linfo->index_file_start_offset= offset;
2044
linfo->index_file_offset = my_b_tell(&index_file);
2050
pthread_mutex_unlock(&LOCK_index);
2056
Find the position in the log-index-file for the given log name.
2059
linfo Store here the next log file name and position to
2060
the file name after that.
2062
need_lock Set this to 1 if the parent doesn't already have a
2066
- Before calling this function, one has to call find_log_pos()
2068
- Mutex needed because we need to make sure the file pointer does not move
2074
LOG_INFO_EOF End of log-index-file found
2076
LOG_INFO_IO Got IO error while reading file
2079
int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
2083
char *fname= linfo->log_file_name;
2086
pthread_mutex_lock(&LOCK_index);
2087
safe_mutex_assert_owner(&LOCK_index);
2089
/* As the file is flushed, we can't get an error here */
2090
(void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
2093
linfo->index_file_start_offset= linfo->index_file_offset;
2094
if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2096
error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2099
fname[length-1]=0; // kill \n
2100
linfo->index_file_offset = my_b_tell(&index_file);
2104
pthread_mutex_unlock(&LOCK_index);
2110
Delete all logs refered to in the index file.
2111
Start writing to a new log file.
2113
The new index file will only contain this file.
2118
If not called from slave thread, write start event to new log
2126
bool MYSQL_BIN_LOG::reset_logs(THD* thd)
2130
const char* save_name;
2134
We need to get both locks to be sure that no one is trying to
2135
write to the index log file.
2137
pthread_mutex_lock(&LOCK_log);
2138
pthread_mutex_lock(&LOCK_index);
2141
The following mutex is needed to ensure that no threads call
2142
'delete thd' as we would then risk missing a 'rollback' from this
2143
thread. If the transaction involved MyISAM tables, it should go
2144
into binlog even on rollback.
2146
VOID(pthread_mutex_lock(&LOCK_thread_count));
2148
/* Save variables so that we can reopen the log */
2150
name=0; // Protect against free
2151
close(LOG_CLOSE_TO_BE_OPENED);
2153
/* First delete all old log files */
2155
if (find_log_pos(&linfo, NullS, 0))
2163
if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
2165
if (my_errno == ENOENT)
2167
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2168
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2169
linfo.log_file_name);
2170
sql_print_information("Failed to delete file '%s'",
2171
linfo.log_file_name);
2177
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2178
ER_BINLOG_PURGE_FATAL_ERR,
2179
"a problem with deleting %s; "
2180
"consider examining correspondence "
2181
"of your binlog index file "
2182
"to the actual binlog files",
2183
linfo.log_file_name);
2188
if (find_next_log(&linfo, 0))
2192
/* Start logging with a new file */
2193
close(LOG_CLOSE_INDEX);
2194
if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
2196
if (my_errno == ENOENT)
2198
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2199
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2201
sql_print_information("Failed to delete file '%s'",
2208
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2209
ER_BINLOG_PURGE_FATAL_ERR,
2210
"a problem with deleting %s; "
2211
"consider examining correspondence "
2212
"of your binlog index file "
2213
"to the actual binlog files",
2219
if (!thd->slave_thread)
2221
if (!open_index_file(index_file_name, 0))
2222
open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0);
2223
my_free((uchar*) save_name, MYF(0));
2226
VOID(pthread_mutex_unlock(&LOCK_thread_count));
2227
pthread_mutex_unlock(&LOCK_index);
2228
pthread_mutex_unlock(&LOCK_log);
2234
Delete relay log files prior to rli->group_relay_log_name
2235
(i.e. all logs which are not involved in a non-finished group
2236
(transaction)), remove them from the index file and start on next
2240
- Protects index file with LOCK_index
2241
- Delete relevant relay log files
2242
- Copy all file names after these ones to the front of the index file
2243
- If the OS has truncate, truncate the file, else fill it with \n'
2244
- Read the next file name from the index file and store in rli->linfo
2246
@param rli Relay log information
2247
@param included If false, all relay logs that are strictly before
2248
rli->group_relay_log_name are deleted ; if true, the
2249
latter is deleted too (i.e. all relay logs
2250
read by the SQL slave thread are deleted).
2253
- This is only called from the slave-execute thread when it has read
2254
all commands from a relay log and want to switch to a new relay log.
2255
- When this happens, we can be in an active transaction as
2256
a transaction can span over two relay logs
2257
(although it is always written as a single block to the master's binary
2258
log, hence cannot span over two master's binary logs).
2263
LOG_INFO_EOF End of log-index-file found
2265
LOG_INFO_SEEK Could not allocate IO cache
2267
LOG_INFO_IO Got IO error while reading file
2270
#ifdef HAVE_REPLICATION
2272
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
2277
assert(rli->slave_running == 1);
2278
assert(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
2280
pthread_mutex_lock(&LOCK_index);
2281
pthread_mutex_lock(&rli->log_space_lock);
2282
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
2283
0, 0, &rli->log_space_total);
2284
// Tell the I/O thread to take the relay_log_space_limit into account
2285
rli->ignore_log_space_limit= 0;
2286
pthread_mutex_unlock(&rli->log_space_lock);
2289
Ok to broadcast after the critical region as there is no risk of
2290
the mutex being destroyed by this thread later - this helps save
2293
pthread_cond_broadcast(&rli->log_space_cond);
2296
Read the next log file name from the index file and pass it back to
2298
If included is true, we want the first relay log;
2299
otherwise we want the one after event_relay_log_name.
2301
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
2303
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
2304
(error=find_next_log(&rli->linfo, 0)))))
2307
sql_print_error("next log error: %d offset: %s log: %s included: %d",
2309
llstr(rli->linfo.index_file_offset,buff),
2310
rli->group_relay_log_name,
2316
Reset rli's coordinates to the current log.
2318
rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
2319
strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
2320
sizeof(rli->event_relay_log_name)-1);
2323
If we removed the rli->group_relay_log_name file,
2324
we must update the rli->group* coordinates, otherwise do not touch it as the
2325
group's execution is not finished (e.g. COMMIT not executed)
2329
rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
2330
strmake(rli->group_relay_log_name,rli->linfo.log_file_name,
2331
sizeof(rli->group_relay_log_name)-1);
2332
rli->notify_group_relay_log_name_update();
2335
/* Store where we are in the new file for the execution thread */
2336
flush_relay_log_info(rli);
2339
pthread_mutex_unlock(&LOCK_index);
2344
Update log index_file.
2347
int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
2349
if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
2352
// now update offsets in index file for running threads
2353
if (need_update_threads)
2354
adjust_linfo_offsets(log_info->index_file_start_offset);
2359
Remove all logs before the given log from disk and from the index file.
2361
@param to_log Delete all log file name before this file.
2362
@param included If true, to_log is deleted too.
2364
@param need_update_threads If we want to update the log coordinates of
2365
all threads. False for relay logs, true otherwise.
2366
@param freed_log_space If not null, decrement this variable of
2367
the amount of log space freed
2370
If any of the logs before the deleted one is in use,
2371
only purge logs up to this one.
2376
LOG_INFO_EOF to_log not found
2377
LOG_INFO_EMFILE too many files opened
2378
LOG_INFO_FATAL if any other than ENOENT error from
2379
stat() or my_delete()
2382
int MYSQL_BIN_LOG::purge_logs(const char *to_log,
2385
bool need_update_threads,
2386
uint64_t *decrease_log_space)
2394
pthread_mutex_lock(&LOCK_index);
2395
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
2399
File name exists in index file; delete until we find this file
2400
or a file that is used.
2402
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2404
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
2405
!log_in_use(log_info.log_file_name))
2408
if (stat(log_info.log_file_name, &s))
2410
if (errno == ENOENT)
2413
It's not fatal if we can't stat a log file that does not exist;
2414
If we could not stat, we won't delete.
2416
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2417
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2418
log_info.log_file_name);
2419
sql_print_information("Failed to execute stat on file '%s'",
2420
log_info.log_file_name);
2426
Other than ENOENT are fatal
2428
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2429
ER_BINLOG_PURGE_FATAL_ERR,
2430
"a problem with getting info on being purged %s; "
2431
"consider examining correspondence "
2432
"of your binlog index file "
2433
"to the actual binlog files",
2434
log_info.log_file_name);
2435
error= LOG_INFO_FATAL;
2441
if (!my_delete(log_info.log_file_name, MYF(0)))
2443
if (decrease_log_space)
2444
*decrease_log_space-= s.st_size;
2448
if (my_errno == ENOENT)
2450
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2451
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2452
log_info.log_file_name);
2453
sql_print_information("Failed to delete file '%s'",
2454
log_info.log_file_name);
2459
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2460
ER_BINLOG_PURGE_FATAL_ERR,
2461
"a problem with deleting %s; "
2462
"consider examining correspondence "
2463
"of your binlog index file "
2464
"to the actual binlog files",
2465
log_info.log_file_name);
2466
if (my_errno == EMFILE)
2468
error= LOG_INFO_EMFILE;
2470
error= LOG_INFO_FATAL;
2476
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2478
if (find_next_log(&log_info, 0) || exit_loop)
2483
If we get killed -9 here, the sysadmin would have to edit
2484
the log index file after restart - otherwise, this should be safe
2486
error= update_log_index(&log_info, need_update_threads);
2493
pthread_mutex_unlock(&LOCK_index);
2498
Remove all logs before the given file date from disk and from the
2501
@param thd Thread pointer
2502
@param before_date Delete all log files before given date.
2505
If any of the logs before the deleted one is in use,
2506
only purge logs up to this one.
2511
LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
2512
LOG_INFO_FATAL if any other than ENOENT error from
2513
stat() or my_delete()
2516
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
2520
struct stat stat_area;
2522
pthread_mutex_lock(&LOCK_index);
2525
Delete until we find curren file
2526
or a file that is used or a file
2527
that is older than purge_time.
2529
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2532
while (strcmp(log_file_name, log_info.log_file_name) &&
2533
!log_in_use(log_info.log_file_name))
2535
if (stat(log_info.log_file_name, &stat_area))
2537
if (errno == ENOENT)
2540
It's not fatal if we can't stat a log file that does not exist.
2542
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2543
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2544
log_info.log_file_name);
2545
sql_print_information("Failed to execute stat on file '%s'",
2546
log_info.log_file_name);
2552
Other than ENOENT are fatal
2554
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2555
ER_BINLOG_PURGE_FATAL_ERR,
2556
"a problem with getting info on being purged %s; "
2557
"consider examining correspondence "
2558
"of your binlog index file "
2559
"to the actual binlog files",
2560
log_info.log_file_name);
2561
error= LOG_INFO_FATAL;
2567
if (stat_area.st_mtime >= purge_time)
2569
if (my_delete(log_info.log_file_name, MYF(0)))
2571
if (my_errno == ENOENT)
2573
/* It's not fatal even if we can't delete a log file */
2574
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2575
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2576
log_info.log_file_name);
2577
sql_print_information("Failed to delete file '%s'",
2578
log_info.log_file_name);
2583
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2584
ER_BINLOG_PURGE_FATAL_ERR,
2585
"a problem with deleting %s; "
2586
"consider examining correspondence "
2587
"of your binlog index file "
2588
"to the actual binlog files",
2589
log_info.log_file_name);
2590
error= LOG_INFO_FATAL;
2594
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2596
if (find_next_log(&log_info, 0))
2601
If we get killed -9 here, the sysadmin would have to edit
2602
the log index file after restart - otherwise, this should be safe
2604
error= update_log_index(&log_info, 1);
2607
pthread_mutex_unlock(&LOCK_index);
2610
#endif /* HAVE_REPLICATION */
2614
Create a new log file name.
2616
@param buf buf of at least FN_REFLEN where new name is stored
2619
If file name will be longer then FN_REFLEN it will be truncated
2622
void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
2624
uint dir_len = dirname_length(log_file_name);
2625
if (dir_len >= FN_REFLEN)
2626
dir_len=FN_REFLEN-1;
2627
strnmov(buf, log_file_name, dir_len);
2628
strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
2633
Check if we are writing/reading to the given log file.
2636
bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
2638
return !strcmp(log_file_name, log_file_name_arg);
2643
Wrappers around new_file_impl to avoid using argument
2644
to control locking. The argument 1) less readable 2) breaks
2645
incapsulation 3) allows external access to the class without
2646
a lock (which is not possible with private new_file_without_locking
2650
void MYSQL_BIN_LOG::new_file()
2656
void MYSQL_BIN_LOG::new_file_without_locking()
2663
Start writing to a new log file or reopen the old file.
2665
@param need_lock Set to 1 if caller has not locked LOCK_log
2668
The new file name is stored last in the index file
2671
void MYSQL_BIN_LOG::new_file_impl(bool need_lock)
2673
char new_name[FN_REFLEN], *new_name_ptr, *old_name;
2681
pthread_mutex_lock(&LOCK_log);
2682
pthread_mutex_lock(&LOCK_index);
2684
safe_mutex_assert_owner(&LOCK_log);
2685
safe_mutex_assert_owner(&LOCK_index);
2688
if binlog is used as tc log, be sure all xids are "unlogged",
2689
so that on recover we only need to scan one - latest - binlog file
2690
for prepared xids. As this is expected to be a rare event,
2691
simple wait strategy is enough. We're locking LOCK_log to be sure no
2692
new Xid_log_event's are added to the log (and prepared_xids is not
2693
increased), and waiting on COND_prep_xids for late threads to
2698
tc_log_page_waits++;
2699
pthread_mutex_lock(&LOCK_prep_xids);
2700
while (prepared_xids) {
2701
pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
2703
pthread_mutex_unlock(&LOCK_prep_xids);
2706
/* Reuse old name if not binlog and not update log */
2710
If user hasn't specified an extension, generate a new log name
2711
We have to do this here and not in open as we want to store the
2712
new file name in the current binary log file.
2714
if (generate_new_name(new_name, name))
2716
new_name_ptr=new_name;
2718
if (log_type == LOG_BIN)
2720
if (!no_auto_events)
2723
We log the whole file name for log file as the user may decide
2724
to change base names at some point.
2726
Rotate_log_event r(new_name+dirname_length(new_name),
2727
0, LOG_EVENT_OFFSET, 0);
2729
bytes_written += r.data_written;
2732
Update needs to be signalled even if there is no rotate event
2733
log rotation should give the waiting thread a signal to
2734
discover EOF and move on to the next log.
2739
name=0; // Don't free name
2740
close(LOG_CLOSE_TO_BE_OPENED);
2743
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2747
new_file() is only used for rotation (in FLUSH LOGS or because size >
2748
max_binlog_size or max_relay_log_size).
2749
If this is a binary log, the Format_description_log_event at the beginning of
2750
the new file should have created=0 (to distinguish with the
2751
Format_description_log_event written at server startup, which should
2752
trigger temp tables deletion on slaves.
2755
open(old_name, log_type, new_name_ptr,
2756
io_cache_type, no_auto_events, max_size, 1);
2757
my_free(old_name,MYF(0));
2761
pthread_mutex_unlock(&LOCK_log);
2762
pthread_mutex_unlock(&LOCK_index);
2768
bool MYSQL_BIN_LOG::append(Log_event* ev)
2771
pthread_mutex_lock(&LOCK_log);
2773
assert(log_file.type == SEQ_READ_APPEND);
2775
Log_event::write() is smart enough to use my_b_write() or
2776
my_b_append() depending on the kind of cache we have.
2778
if (ev->write(&log_file))
2783
bytes_written+= ev->data_written;
2784
if ((uint) my_b_append_tell(&log_file) > max_size)
2785
new_file_without_locking();
2788
pthread_mutex_unlock(&LOCK_log);
2789
signal_update(); // Safe as we don't call close
2794
bool MYSQL_BIN_LOG::appendv(const char* buf, uint len,...)
2800
assert(log_file.type == SEQ_READ_APPEND);
2802
safe_mutex_assert_owner(&LOCK_log);
2805
if (my_b_append(&log_file,(uchar*) buf,len))
2810
bytes_written += len;
2811
} while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
2812
if ((uint) my_b_append_tell(&log_file) > max_size)
2813
new_file_without_locking();
2822
bool MYSQL_BIN_LOG::flush_and_sync()
2824
int err=0, fd=log_file.file;
2825
safe_mutex_assert_owner(&LOCK_log);
2826
if (flush_io_cache(&log_file))
2828
if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
2830
sync_binlog_counter= 0;
2831
err=my_sync(fd, MYF(MY_WME));
2836
void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
2838
assert(!thd->binlog_evt_union.do_union);
2839
thd->binlog_evt_union.do_union= true;
2840
thd->binlog_evt_union.unioned_events= false;
2841
thd->binlog_evt_union.unioned_events_trans= false;
2842
thd->binlog_evt_union.first_query_id= query_id_param;
2845
void MYSQL_BIN_LOG::stop_union_events(THD *thd)
2847
assert(thd->binlog_evt_union.do_union);
2848
thd->binlog_evt_union.do_union= false;
2851
bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
2853
return (thd->binlog_evt_union.do_union &&
2854
query_id_param >= thd->binlog_evt_union.first_query_id);
2859
These functions are placed in this file since they need access to
2860
binlog_hton, which has internal linkage.
2863
int THD::binlog_setup_trx_data()
2865
binlog_trx_data *trx_data=
2866
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2869
return(0); // Already set up
2871
trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL));
2873
open_cached_file(&trx_data->trans_log, mysql_tmpdir,
2874
LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
2876
my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR));
2877
return(1); // Didn't manage to set it up
2879
thd_set_ha_data(this, binlog_hton, trx_data);
2881
trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data;
2887
Function to start a statement and optionally a transaction for the
2891
binlog_start_trans_and_stmt()
2895
This function does three things:
2896
- Start a transaction if not in autocommit mode or if a BEGIN
2897
statement has been seen.
2899
- Start a statement transaction to allow us to truncate the binary
2902
- Save the currrent binlog position so that we can roll back the
2903
statement by truncating the transaction log.
2905
We only update the saved position if the old one was undefined,
2906
the reason is that there are some cases (e.g., for CREATE-SELECT)
2907
where the position is saved twice (e.g., both in
2908
select_create::prepare() and THD::binlog_write_table_map()) , but
2909
we should use the first. This means that calls to this function
2910
can be used to start the statement before the first table map
2911
event, to include some extra events.
2915
THD::binlog_start_trans_and_stmt()
2917
binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2919
if (trx_data == NULL ||
2920
trx_data->before_stmt_pos == MY_OFF_T_UNDEF)
2922
this->binlog_set_stmt_begin();
2923
if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
2924
trans_register_ha(this, true, binlog_hton);
2925
trans_register_ha(this, false, binlog_hton);
2927
Mark statement transaction as read/write. We never start
2928
a binary log transaction and keep it read-only,
2929
therefore it's best to mark the transaction read/write just
2930
at the same time we start it.
2931
Not necessary to mark the normal transaction read/write
2932
since the statement-level flag will be propagated automatically
2933
inside ha_commit_trans.
2935
ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
2940
void THD::binlog_set_stmt_begin() {
2941
binlog_trx_data *trx_data=
2942
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2945
The call to binlog_trans_log_savepos() might create the trx_data
2946
structure, if it didn't exist before, so we save the position
2947
into an auto variable and then write it into the transaction
2948
data for the binary log (i.e., trx_data).
2951
binlog_trans_log_savepos(this, &pos);
2952
trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2953
trx_data->before_stmt_pos= pos;
2958
Write a table map to the binary log.
2961
int THD::binlog_write_table_map(TABLE *table, bool is_trans)
2965
/* Pre-conditions */
2966
assert(current_stmt_binlog_row_based && mysql_bin_log.is_open());
2967
assert(table->s->table_map_id != ULONG_MAX);
2969
Table_map_log_event::flag_set const
2970
flags= Table_map_log_event::TM_NO_FLAGS;
2973
the_event(this, table, table->s->table_map_id, is_trans, flags);
2975
if (is_trans && binlog_table_maps == 0)
2976
binlog_start_trans_and_stmt();
2978
if ((error= mysql_bin_log.write(&the_event)))
2981
binlog_table_maps++;
2982
table->s->table_map_version= mysql_bin_log.table_map_version();
2987
THD::binlog_get_pending_rows_event() const
2989
binlog_trx_data *const trx_data=
2990
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2992
This is less than ideal, but here's the story: If there is no
2993
trx_data, prepare_pending_rows_event() has never been called
2994
(since the trx_data is set up there). In that case, we just return
2997
return trx_data ? trx_data->pending() : NULL;
3001
THD::binlog_set_pending_rows_event(Rows_log_event* ev)
3003
if (thd_get_ha_data(this, binlog_hton) == NULL)
3004
binlog_setup_trx_data();
3006
binlog_trx_data *const trx_data=
3007
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3010
trx_data->set_pending(ev);
3015
Moves the last bunch of rows from the pending Rows event to the binlog
3016
(either cached binlog if transaction, or disk binlog). Sets a new pending
3020
MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
3021
Rows_log_event* event)
3023
assert(mysql_bin_log.is_open());
3027
binlog_trx_data *const trx_data=
3028
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3032
if (Rows_log_event* pending= trx_data->pending())
3034
IO_CACHE *file= &log_file;
3037
Decide if we should write to the log file directly or to the
3040
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
3041
file= &trx_data->trans_log;
3044
If we are writing to the log file directly, we could avoid
3045
locking the log. This does not work since we need to step the
3046
m_table_map_version below, and that change has to be protected
3047
by the LOCK_log mutex.
3049
pthread_mutex_lock(&LOCK_log);
3052
Write pending event to log file or transaction cache
3054
if (pending->write(file))
3056
pthread_mutex_unlock(&LOCK_log);
3061
We step the table map version if we are writing an event
3062
representing the end of a statement. We do this regardless of
3063
wheather we write to the transaction cache or to directly to the
3066
In an ideal world, we could avoid stepping the table map version
3067
if we were writing to a transaction cache, since we could then
3068
reuse the table map that was written earlier in the transaction
3069
cache. This does not work since STMT_END_F implies closing all
3070
table mappings on the slave side.
3072
TODO: Find a solution so that table maps does not have to be
3073
written several times within a transaction.
3075
if (pending->get_flags(Rows_log_event::STMT_END_F))
3076
++m_table_map_version;
3080
if (file == &log_file)
3082
error= flush_and_sync();
3086
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3090
pthread_mutex_unlock(&LOCK_log);
3093
thd->binlog_set_pending_rows_event(event);
3099
Write an event to the binary log.
3102
bool MYSQL_BIN_LOG::write(Log_event *event_info)
3104
THD *thd= event_info->thd;
3107
if (thd->binlog_evt_union.do_union)
3110
In Stored function; Remember that function call caused an update.
3111
We will log the function call to the binary log on function exit
3113
thd->binlog_evt_union.unioned_events= true;
3114
thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
3119
Flush the pending rows event to the transaction cache or to the
3120
log file. Since this function potentially aquire the LOCK_log
3121
mutex, we do this before aquiring the LOCK_log mutex in this
3124
We only end the statement if we are in a top-level statement. If
3125
we are inside a stored function, we do not end the statement since
3126
this will close all tables on the slave.
3128
bool const end_stmt= false;
3129
thd->binlog_flush_pending_rows_event(end_stmt);
3131
pthread_mutex_lock(&LOCK_log);
3134
In most cases this is only called if 'is_open()' is true; in fact this is
3135
mostly called if is_open() *was* true a few instructions before, but it
3136
could have changed since.
3138
if (likely(is_open()))
3140
IO_CACHE *file= &log_file;
3142
In the future we need to add to the following if tests like
3143
"do the involved tables match (to be implemented)
3144
binlog_[wild_]{do|ignore}_table?" (WL#1049)"
3146
const char *local_db= event_info->get_db();
3147
if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
3148
(!binlog_filter->db_ok(local_db)))
3150
VOID(pthread_mutex_unlock(&LOCK_log));
3155
Should we write to the binlog cache or to the binlog on disk?
3156
Write to the binlog cache if:
3157
- it is already not empty (meaning we're in a transaction; note that the
3158
present event could be about a non-transactional table, but still we need
3159
to write to the binlog cache in that case to handle updates to mixed
3160
trans/non-trans table types the best possible in binlogging)
3161
- or if the event asks for it (cache_stmt == TRUE).
3163
if (opt_using_transactions && thd)
3165
if (thd->binlog_setup_trx_data())
3168
binlog_trx_data *const trx_data=
3169
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3170
IO_CACHE *trans_log= &trx_data->trans_log;
3171
my_off_t trans_log_pos= my_b_tell(trans_log);
3172
if (event_info->get_cache_stmt() || trans_log_pos != 0)
3174
if (trans_log_pos == 0)
3175
thd->binlog_start_trans_and_stmt();
3179
TODO as Mats suggested, for all the cases above where we write to
3180
trans_log, it sounds unnecessary to lock LOCK_log. We should rather
3181
test first if we want to write to trans_log, and if not, lock
3187
No check for auto events flag here - this write method should
3188
never be called if auto-events are enabled
3192
1. Write first log events which describe the 'run environment'
3197
If row-based binlogging, Insert_id, Rand and other kind of "setting
3198
context" events are not needed.
3202
if (!thd->current_stmt_binlog_row_based)
3204
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3206
Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
3207
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
3211
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3214
If the auto_increment was second in a table's index (possible with
3215
MyISAM or BDB) (table->next_number_keypart != 0), such event is
3216
in fact not necessary. We could avoid logging it.
3218
Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
3219
thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3226
Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
3230
if (thd->user_var_events.elements)
3232
for (uint i= 0; i < thd->user_var_events.elements; i++)
3234
BINLOG_USER_VAR_EVENT *user_var_event;
3235
get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
3236
User_var_log_event e(thd, user_var_event->user_var_event->name.str,
3237
user_var_event->user_var_event->name.length,
3238
user_var_event->value,
3239
user_var_event->length,
3240
user_var_event->type,
3241
user_var_event->charset_number);
3250
Write the SQL command
3253
if (event_info->write(file))
3256
if (file == &log_file) // we are writing to the real log (disk)
3258
if (flush_and_sync())
3261
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3268
if (my_errno == EFBIG)
3269
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
3271
my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
3276
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
3277
++m_table_map_version;
3279
pthread_mutex_unlock(&LOCK_log);
3284
int error_log_print(enum loglevel level, const char *format,
3287
return logger.error_log_print(level, format, args);
3291
bool slow_log_print(THD *thd, const char *query, uint query_length,
3292
uint64_t current_utime)
3294
return logger.slow_log_print(thd, query, query_length, current_utime);
3298
bool LOGGER::log_command(THD *thd, enum enum_server_command command)
3301
Log command if we have at least one log event handler enabled and want
3302
to log this king of commands
3304
if (*general_log_handler_list && (what_to_log & (1L << (uint) command)))
3306
if (thd->options & OPTION_LOG_OFF)
3319
bool general_log_print(THD *thd, enum enum_server_command command,
3320
const char *format, ...)
3325
/* Print the message to the buffer if we want to log this king of commands */
3326
if (! logger.log_command(thd, command))
3329
va_start(args, format);
3330
error= logger.general_log_print(thd, command, format, args);
3336
bool general_log_write(THD *thd, enum enum_server_command command,
3337
const char *query, uint query_length)
3339
/* Write the message to the log if we want to log this king of commands */
3340
if (logger.log_command(thd, command))
3341
return logger.general_log_write(thd, command, query, query_length);
3346
void MYSQL_BIN_LOG::rotate_and_purge(uint flags)
3348
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3349
pthread_mutex_lock(&LOCK_log);
3350
if ((flags & RP_FORCE_ROTATE) ||
3351
(my_b_tell(&log_file) >= (my_off_t) max_size))
3353
new_file_without_locking();
3354
#ifdef HAVE_REPLICATION
3355
if (expire_logs_days)
3357
time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
3358
if (purge_time >= 0)
3359
purge_logs_before_date(purge_time);
3363
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3364
pthread_mutex_unlock(&LOCK_log);
3367
uint MYSQL_BIN_LOG::next_file_id()
3370
pthread_mutex_lock(&LOCK_log);
3372
pthread_mutex_unlock(&LOCK_log);
3378
Write the contents of a cache to the binary log.
3382
cache Cache to write to the binary log
3383
lock_log True if the LOCK_log mutex should be aquired, false otherwise
3384
sync_log True if the log should be flushed and sync:ed
3387
Write the contents of the cache to the binary log. The cache will
3388
be reset as a READ_CACHE to be able to read the contents from it.
3391
int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
3393
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
3395
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
3396
return ER_ERROR_ON_WRITE;
3397
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
3399
uchar header[LOG_EVENT_HEADER_LEN];
3402
The events in the buffer have incorrect end_log_pos data
3403
(relative to beginning of group rather than absolute),
3404
so we'll recalculate them in situ so the binlog is always
3405
correct, even in the middle of a group. This is possible
3406
because we now know the start position of the group (the
3407
offset of this cache in the log, if you will); all we need
3408
to do is to find all event-headers, and add the position of
3409
the group to the end_log_pos of each event. This is pretty
3410
straight forward, except that we read the cache in segments,
3411
so an event-header might end up on the cache-border and get
3415
group= (uint)my_b_tell(&log_file);
3422
if we only got a partial header in the last iteration,
3423
get the other half now and process a full header.
3425
if (unlikely(carry > 0))
3427
assert(carry < LOG_EVENT_HEADER_LEN);
3429
/* assemble both halves */
3430
memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
3432
/* fix end_log_pos */
3433
val= uint4korr(&header[LOG_POS_OFFSET]) + group;
3434
int4store(&header[LOG_POS_OFFSET], val);
3436
/* write the first half of the split header */
3437
if (my_b_write(&log_file, header, carry))
3438
return ER_ERROR_ON_WRITE;
3441
copy fixed second half of header to cache so the correct
3442
version will be written later.
3444
memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
3446
/* next event header at ... */
3447
hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
3452
/* if there is anything to write, process it. */
3454
if (likely(length > 0))
3457
process all event-headers in this (partial) cache.
3458
if next header is beyond current read-buffer,
3459
we'll get it later (though not necessarily in the
3460
very next iteration, just "eventually").
3463
while (hdr_offs < length)
3466
partial header only? save what we can get, process once
3470
if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
3472
carry= length - hdr_offs;
3473
memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
3478
/* we've got a full event-header, and it came in one piece */
3480
uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
3482
/* fix end_log_pos */
3483
val= uint4korr(log_pos) + group;
3484
int4store(log_pos, val);
3486
/* next event header at ... */
3487
log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
3488
hdr_offs += uint4korr(log_pos);
3494
Adjust hdr_offs. Note that it may still point beyond the segment
3495
read in the next iteration; if the current event is very long,
3496
it may take a couple of read-iterations (and subsequent adjustments
3497
of hdr_offs) for it to point into the then-current segment.
3498
If we have a split header (!carry), hdr_offs will be set at the
3499
beginning of the next iteration, overwriting the value we set here:
3504
/* Write data to the binary log file */
3505
if (my_b_write(&log_file, cache->read_pos, length))
3506
return ER_ERROR_ON_WRITE;
3507
cache->read_pos=cache->read_end; // Mark buffer used up
3508
} while ((length= my_b_fill(cache)));
3519
Write a cached log entry to the binary log.
3520
- To support transaction over replication, we wrap the transaction
3521
with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
3522
We want to write a BEGIN/ROLLBACK block when a non-transactional table
3523
was updated in a transaction which was rolled back. This is to ensure
3524
that the same updates are run on the slave.
3527
@param cache The cache to copy to the binlog
3528
@param commit_event The commit event to print after writing the
3529
contents of the cache.
3532
We only come here if there is something in the cache.
3534
The thing in the cache is always a complete transaction.
3536
'cache' needs to be reinitialized after this functions returns.
3539
bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
3541
VOID(pthread_mutex_lock(&LOCK_log));
3543
/* NULL would represent nothing to replicate after ROLLBACK */
3544
assert(commit_event != NULL);
3547
if (likely(is_open())) // Should always be true
3550
We only bother to write to the binary log if there is anything
3553
if (my_b_tell(cache) > 0)
3556
Log "BEGIN" at the beginning of every transaction. Here, a
3557
transaction is either a BEGIN..COMMIT block or a single
3558
statement in autocommit mode.
3560
Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), true, false);
3562
Imagine this is rollback due to net timeout, after all
3563
statements of the transaction succeeded. Then we want a
3564
zero-error code in BEGIN. In other words, if there was a
3565
really serious error code it's already in the statement's
3566
events, there is no need to put it also in this internally
3567
generated event, and as this event is generated late it would
3568
lead to false alarms.
3570
This is safer than thd->clear_error() against kills at shutdown.
3572
qinfo.error_code= 0;
3574
Now this Query_log_event has artificial log_pos 0. It must be
3575
adjusted to reflect the real position in the log. Not doing it
3576
would confuse the slave: it would prevent this one from
3577
knowing where he is in the master's binlog, which would result
3578
in wrong positions being shown to the user, MASTER_POS_WAIT
3581
if (qinfo.write(&log_file))
3584
if ((write_error= write_cache(cache, false, false)))
3587
if (commit_event && commit_event->write(&log_file))
3589
if (flush_and_sync())
3591
if (cache->error) // Error on read
3593
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
3594
write_error=1; // Don't give more errors
3601
if commit_event is Xid_log_event, increase the number of
3602
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
3603
if there're prepared xids in it - see the comment in new_file() for
3605
If the commit_event is not Xid_log_event (then it's a Query_log_event)
3606
rotate binlog, if necessary.
3608
if (commit_event && commit_event->get_type_code() == XID_EVENT)
3610
pthread_mutex_lock(&LOCK_prep_xids);
3612
pthread_mutex_unlock(&LOCK_prep_xids);
3615
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3617
VOID(pthread_mutex_unlock(&LOCK_log));
3625
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
3627
VOID(pthread_mutex_unlock(&LOCK_log));
3633
Wait until we get a signal that the relay log has been updated
3635
@param[in] thd a THD struct
3637
LOCK_log must be taken before calling this function.
3638
It will be released at the end of the function.
3641
void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
3643
const char *old_msg;
3644
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3645
"Slave has read all relay log; "
3646
"waiting for the slave I/O "
3647
"thread to update it" );
3648
pthread_cond_wait(&update_cond, &LOCK_log);
3649
thd->exit_cond(old_msg);
3655
Wait until we get a signal that the binary log has been updated.
3656
Applies to master only.
3659
@param[in] thd a THD struct
3660
@param[in] timeout a pointer to a timespec;
3661
NULL means to wait w/o timeout.
3662
@retval 0 if got signalled on update
3663
@retval non-0 if wait timeout elapsed
3665
LOCK_log must be taken before calling this function.
3666
LOCK_log is being released while the thread is waiting.
3667
LOCK_log is released by the caller.
3670
int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
3671
const struct timespec *timeout)
3674
const char* old_msg = thd->proc_info;
3675
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3676
"Master has sent all binlog to slave; "
3677
"waiting for binlog to be updated");
3679
pthread_cond_wait(&update_cond, &LOCK_log);
3681
ret= pthread_cond_timedwait(&update_cond, &LOCK_log,
3682
const_cast<struct timespec *>(timeout));
3690
@param exiting Bitmask for one or more of the following bits:
3691
- LOG_CLOSE_INDEX : if we should close the index file
3692
- LOG_CLOSE_TO_BE_OPENED : if we intend to call open
3693
at once after close.
3694
- LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
3697
One can do an open on the object at once after doing a close.
3698
The internal structures are not freed until cleanup() is called
3701
void MYSQL_BIN_LOG::close(uint exiting)
3702
{ // One can't set log_type here!
3703
if (log_state == LOG_OPENED)
3705
#ifdef HAVE_REPLICATION
3706
if (log_type == LOG_BIN && !no_auto_events &&
3707
(exiting & LOG_CLOSE_STOP_EVENT))
3711
bytes_written+= s.data_written;
3714
#endif /* HAVE_REPLICATION */
3716
/* don't pwrite in a file opened with O_APPEND - it doesn't work */
3717
if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
3719
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
3720
uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
3721
pwrite(log_file.file, &flags, 1, offset);
3724
/* this will cleanup IO_CACHE, sync and close the file */
3725
MYSQL_LOG::close(exiting);
3729
The following test is needed even if is_open() is not set, as we may have
3730
called a not complete close earlier and the index file is still open.
3733
if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
3735
end_io_cache(&index_file);
3736
if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
3739
sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
3742
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
3748
void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
3751
We need to take locks, otherwise this may happen:
3752
new_file() is called, calls open(old_max_size), then before open() starts,
3753
set_max_size() sets max_size to max_size_arg, then open() starts and
3754
uses the old_max_size argument, so max_size_arg has been overwritten and
3755
it's like if the SET command was never run.
3757
pthread_mutex_lock(&LOCK_log);
3759
max_size= max_size_arg;
3760
pthread_mutex_unlock(&LOCK_log);
3766
Check if a string is a valid number.
3768
@param str String to test
3769
@param res Store value here
3770
@param allow_wildcards Set to 1 if we should ignore '%' and '_'
3773
For the moment the allow_wildcards argument is not used
3774
Should be move to some other file.
3777
1 String is a number
3782
static bool test_if_number(register const char *str,
3783
long *res, bool allow_wildcards)
3790
while (*str++ == ' ') ;
3791
if (*--str == '-' || *str == '+')
3793
while (my_isdigit(files_charset_info,*str) ||
3794
(allow_wildcards && (*str == wild_many || *str == wild_one)))
3802
my_isdigit(files_charset_info,*str) ||
3803
(allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3806
if (*str != 0 || flag == 0)
3810
return(1); /* Number ok */
3811
} /* test_if_number */
3814
void sql_perror(const char *message)
3816
#ifdef HAVE_STRERROR
3817
sql_print_error("%s: %s",message, strerror(errno));
3824
bool flush_error_log()
3829
char err_renamed[FN_REFLEN], *end;
3830
end= strmake(err_renamed,log_error_file,FN_REFLEN-4);
3831
strmov(end, "-old");
3832
VOID(pthread_mutex_lock(&LOCK_error_log));
3833
char err_temp[FN_REFLEN+4];
3835
On Windows is necessary a temporary file for to rename
3836
the current error file.
3838
strxmov(err_temp, err_renamed,"-tmp",NullS);
3839
(void) my_delete(err_temp, MYF(0));
3840
if (freopen(err_temp,"a+",stdout))
3846
freopen(err_temp,"a+",stderr);
3847
(void) my_delete(err_renamed, MYF(0));
3848
my_rename(log_error_file,err_renamed,MYF(0));
3849
if (freopen(log_error_file,"a+",stdout))
3850
freopen(log_error_file,"a+",stderr);
3852
if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0)
3854
while ((bytes= my_read(fd, buf, IO_SIZE, MYF(0))) &&
3855
bytes != MY_FILE_ERROR)
3856
my_fwrite(stderr, buf, bytes, MYF(0));
3857
my_close(fd, MYF(0));
3859
(void) my_delete(err_temp, MYF(0));
3863
VOID(pthread_mutex_unlock(&LOCK_error_log));
3868
void MYSQL_BIN_LOG::signal_update()
3870
pthread_cond_broadcast(&update_cond);
3875
Prints a printf style message to the error log and, under NT, to the
3878
This function prints the message into a buffer and then sends that buffer
3879
to other functions to write that message to other logging sources.
3881
@param event_type Type of event to write (Error, Warning, or Info)
3882
@param format Printf style format of message
3883
@param args va_list list of arguments for the message
3886
The function always returns 0. The return value is present in the
3887
signature to be compatible with other logging routines, which could
3888
return an error (e.g. logging to the log tables)
3890
static void print_buffer_to_file(enum loglevel level,
3891
int error_code __attribute__((unused)),
3893
size_t buffer_length __attribute__((unused)))
3899
VOID(pthread_mutex_lock(&LOCK_error_log));
3902
localtime_r(&skr, &tm_tmp);
3905
fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n",
3906
start->tm_year % 100,
3912
(level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
3913
"Warning" : "Note"),
3918
VOID(pthread_mutex_unlock(&LOCK_error_log));
3923
int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
3927
int error_code= errno;
3929
length= vsnprintf(buff, sizeof(buff), format, args);
3931
print_buffer_to_file(level, error_code, buff, length);
3937
void sql_print_error(const char *format, ...)
3941
va_start(args, format);
3942
error_log_print(ERROR_LEVEL, format, args);
3949
void sql_print_warning(const char *format, ...)
3953
va_start(args, format);
3954
error_log_print(WARNING_LEVEL, format, args);
3961
void sql_print_information(const char *format, ...)
3965
va_start(args, format);
3966
error_log_print(INFORMATION_LEVEL, format, args);
3973
/********* transaction coordinator log for 2pc - mmap() based solution *******/
3976
the log consists of a file, mmapped to a memory.
3977
file is divided on pages of tc_log_page_size size.
3978
(usable size of the first page is smaller because of log header)
3979
there's PAGE control structure for each page
3980
each page (or rather PAGE control structure) can be in one of three
3981
states - active, syncing, pool.
3982
there could be only one page in active or syncing states,
3983
but many in pool - pool is fifo queue.
3984
usual lifecycle of a page is pool->active->syncing->pool
3985
"active" page - is a page where new xid's are logged.
3986
the page stays active as long as syncing slot is taken.
3987
"syncing" page is being synced to disk. no new xid can be added to it.
3988
when the sync is done the page is moved to a pool and an active page
3991
the result of such an architecture is a natural "commit grouping" -
3992
If commits are coming faster than the system can sync, they do not
3993
stall. Instead, all commit that came since the last sync are
3994
logged to the same page, and they all are synced with the next -
3995
one - sync. Thus, thought individual commits are delayed, throughput
3998
when a xid is added to an active page, the thread of this xid waits
3999
for a page's condition until the page is synced. when syncing slot
4000
becomes vacant one of these waiters is awaken to take care of syncing.
4001
it syncs the page and signals all waiters that the page is synced.
4002
PAGE::waiters is used to count these waiters, and a page may never
4003
become active again until waiters==0 (that is all waiters from the
4004
previous sync have noticed the sync was completed)
4006
note, that the page becomes "dirty" and has to be synced only when a
4007
new xid is added into it. Removing a xid from a page does not make it
4008
dirty - we don't sync removals to disk.
4011
ulong tc_log_page_waits= 0;
4015
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
4017
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
4019
ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
4020
ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
4022
int TC_LOG_MMAP::open(const char *opt_name)
4025
bool crashed= false;
4028
assert(total_ha_2pc > 1);
4029
assert(opt_name && opt_name[0]);
4031
tc_log_page_size= my_getpagesize();
4032
assert(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
4034
fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
4035
if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
4037
if (my_errno != ENOENT)
4039
if (using_heuristic_recover())
4041
if ((fd= my_create(logname, CREATE_MODE, O_RDWR, MYF(MY_WME))) < 0)
4044
file_length= opt_tc_log_size;
4045
if (ftruncate(fd, file_length))
4052
sql_print_information("Recovering after a crash using %s", opt_name);
4053
if (tc_heuristic_recover)
4055
sql_print_error("Cannot perform automatic crash recovery when "
4056
"--tc-heuristic-recover is used");
4059
file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
4060
if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
4064
data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
4065
MAP_NOSYNC|MAP_SHARED, fd, 0);
4066
if (data == MAP_FAILED)
4073
npages=(uint)file_length/tc_log_page_size;
4074
assert(npages >= 3); // to guarantee non-empty pool
4075
if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
4078
for (pg=pages, i=0; i < npages; i++, pg++)
4083
pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
4084
pthread_cond_init (&pg->cond, 0);
4085
pg->start=(my_xid *)(data + i*tc_log_page_size);
4087
pg->end=(my_xid *)(pg->start + tc_log_page_size);
4088
pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
4090
pages[0].size=pages[0].free=
4091
(tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
4092
pages[0].start=pages[0].end-pages[0].size;
4093
pages[npages-1].next=0;
4096
if (crashed && recover())
4099
memcpy(data, tc_log_magic, sizeof(tc_log_magic));
4100
data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
4101
msync(data, tc_log_page_size, MS_SYNC);
4102
my_sync(fd, MYF(0));
4105
pthread_mutex_init(&LOCK_sync, MY_MUTEX_INIT_FAST);
4106
pthread_mutex_init(&LOCK_active, MY_MUTEX_INIT_FAST);
4107
pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST);
4108
pthread_cond_init(&COND_active, 0);
4109
pthread_cond_init(&COND_pool, 0);
4116
pool_last=pages+npages-1;
4126
there is no active page, let's got one from the pool.
4128
Two strategies here:
4129
-# take the first from the pool
4130
-# if there're waiters - take the one with the most free space.
4133
TODO page merging. try to allocate adjacent page first,
4134
so that they can be flushed both in one sync
4137
void TC_LOG_MMAP::get_active_from_pool()
4139
PAGE **p, **best_p=0;
4143
pthread_mutex_lock(&LOCK_pool);
4148
if ((*p)->waiters == 0) // can the first page be used ?
4149
break; // yes - take it.
4151
best_free=0; // no - trying second strategy
4152
for (p=&(*p)->next; *p; p=&(*p)->next)
4154
if ((*p)->waiters == 0 && (*p)->free > best_free)
4156
best_free=(*p)->free;
4161
while ((*best_p == 0 || best_free == 0) && overflow());
4164
if (active->free == active->size) // we've chosen an empty page
4166
tc_log_cur_pages_used++;
4167
set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
4170
if ((*best_p)->next) // unlink the page from the pool
4171
*best_p=(*best_p)->next;
4176
pthread_mutex_unlock(&LOCK_pool);
4181
perhaps, increase log size ?
4183
int TC_LOG_MMAP::overflow()
4186
simple overflow handling - just wait
4187
TODO perhaps, increase log size ?
4188
let's check the behaviour of tc_log_page_waits first
4190
tc_log_page_waits++;
4191
pthread_cond_wait(&COND_pool, &LOCK_pool);
4192
return 1; // always return 1
4196
Record that transaction XID is committed on the persistent storage.
4198
This function is called in the middle of two-phase commit:
4199
First all resources prepare the transaction, then tc_log->log() is called,
4200
then all resources commit the transaction, then tc_log->unlog() is called.
4202
All access to active page is serialized but it's not a problem, as
4203
we're assuming that fsync() will be a main bottleneck.
4204
That is, parallelizing writes to log pages we'll decrease number of
4205
threads waiting for a page, but then all these threads will be waiting
4206
for a fsync() anyway
4208
If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
4209
records XID in a special Xid_log_event.
4210
If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
4216
\# - otherwise, "cookie", a number that will be passed as an argument
4217
to unlog() call. tc_log can define it any way it wants,
4218
and use for whatever purposes. TC_LOG_MMAP sets it
4219
to the position in memory where xid was logged to.
4222
int TC_LOG_MMAP::log_xid(THD *thd __attribute__((unused)), my_xid xid)
4228
pthread_mutex_lock(&LOCK_active);
4231
if active page is full - just wait...
4232
frankly speaking, active->free here accessed outside of mutex
4233
protection, but it's safe, because it only means we may miss an
4234
unlog() for the active page, and we're not waiting for it here -
4235
unlog() does not signal COND_active.
4237
while (unlikely(active && active->free == 0))
4238
pthread_cond_wait(&COND_active, &LOCK_active);
4240
/* no active page ? take one from the pool */
4242
get_active_from_pool();
4245
pthread_mutex_lock(&p->lock);
4247
/* searching for an empty slot */
4251
assert(p->ptr < p->end); // because p->free > 0
4254
/* found! store xid there and mark the page dirty */
4255
cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
4260
/* to sync or not to sync - this is the question */
4261
pthread_mutex_unlock(&LOCK_active);
4262
pthread_mutex_lock(&LOCK_sync);
4263
pthread_mutex_unlock(&p->lock);
4266
{ // somebody's syncing. let's wait
4269
note - it must be while (), not do ... while () here
4270
as p->state may be not DIRTY when we come here
4272
while (p->state == DIRTY && syncing)
4273
pthread_cond_wait(&p->cond, &LOCK_sync);
4275
err= p->state == ERROR;
4276
if (p->state != DIRTY) // page was synced
4278
if (p->waiters == 0)
4279
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4280
pthread_mutex_unlock(&LOCK_sync);
4281
goto done; // we're done
4283
} // page was not synced! do it now
4284
assert(active == p && syncing == 0);
4285
pthread_mutex_lock(&LOCK_active);
4286
syncing=p; // place is vacant - take it
4287
active=0; // page is not active anymore
4288
pthread_cond_broadcast(&COND_active); // in case somebody's waiting
4289
pthread_mutex_unlock(&LOCK_active);
4290
pthread_mutex_unlock(&LOCK_sync);
4294
return err ? 0 : cookie;
4297
int TC_LOG_MMAP::sync()
4301
assert(syncing != active);
4304
sit down and relax - this can take a while...
4305
note - no locks are held at this point
4307
err= msync(syncing->start, 1, MS_SYNC);
4309
err= my_sync(fd, MYF(0));
4311
/* page is synced. let's move it to the pool */
4312
pthread_mutex_lock(&LOCK_pool);
4313
pool_last->next=syncing;
4316
syncing->state= err ? ERROR : POOL;
4317
pthread_cond_broadcast(&syncing->cond); // signal "sync done"
4318
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4319
pthread_mutex_unlock(&LOCK_pool);
4321
/* marking 'syncing' slot free */
4322
pthread_mutex_lock(&LOCK_sync);
4324
pthread_cond_signal(&active->cond); // wake up a new syncer
4325
pthread_mutex_unlock(&LOCK_sync);
4330
erase xid from the page, update page free space counters/pointers.
4331
cookie points directly to the memory where xid was logged.
4334
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid __attribute__((unused)))
4336
PAGE *p=pages+(cookie/tc_log_page_size);
4337
my_xid *x=(my_xid *)(data+cookie);
4340
assert(x >= p->start && x < p->end);
4343
pthread_mutex_lock(&p->lock);
4345
assert(p->free <= p->size);
4346
set_if_smaller(p->ptr, x);
4347
if (p->free == p->size) // the page is completely empty
4348
statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
4349
if (p->waiters == 0) // the page is in pool and ready to rock
4350
pthread_cond_signal(&COND_pool); // ping ... for overflow()
4351
pthread_mutex_unlock(&p->lock);
4354
void TC_LOG_MMAP::close()
4359
pthread_mutex_destroy(&LOCK_sync);
4360
pthread_mutex_destroy(&LOCK_active);
4361
pthread_mutex_destroy(&LOCK_pool);
4362
pthread_cond_destroy(&COND_pool);
4364
data[0]='A'; // garble the first (signature) byte, in case my_delete fails
4366
for (i=0; i < npages; i++)
4368
if (pages[i].ptr == 0)
4370
pthread_mutex_destroy(&pages[i].lock);
4371
pthread_cond_destroy(&pages[i].cond);
4374
my_free((uchar*)pages, MYF(0));
4376
my_munmap((char*)data, (size_t)file_length);
4378
my_close(fd, MYF(0));
4380
if (inited>=5) // cannot do in the switch because of Windows
4381
my_delete(logname, MYF(MY_WME));
4385
int TC_LOG_MMAP::recover()
4388
PAGE *p=pages, *end_p=pages+npages;
4390
if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
4392
sql_print_error("Bad magic header in tc log");
4397
the first byte after magic signature is set to current
4398
number of storage engines on startup
4400
if (data[sizeof(tc_log_magic)] != total_ha_2pc)
4402
sql_print_error("Recovery failed! You must enable "
4403
"exactly %d storage engines that support "
4404
"two-phase commit protocol",
4405
data[sizeof(tc_log_magic)]);
4409
if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
4410
sizeof(my_xid), 0, 0, MYF(0)))
4413
for ( ; p < end_p ; p++)
4415
for (my_xid *x=p->start; x < p->end; x++)
4416
if (*x && my_hash_insert(&xids, (uchar *)x))
4420
if (ha_recover(&xids))
4424
memset(data, 0, (size_t)file_length);
4430
sql_print_error("Crash recovery failed. Either correct the problem "
4431
"(if it's, for example, out of memory error) and restart, "
4432
"or delete tc log and start mysqld with "
4433
"--tc-heuristic-recover={commit|rollback}");
4439
TC_LOG_DUMMY tc_log_dummy;
4440
TC_LOG_MMAP tc_log_mmap;
4443
Perform heuristic recovery, if --tc-heuristic-recover was used.
4446
no matter whether heuristic recovery was successful or not
4447
mysqld must exit. So, return value is the same in both cases.
4450
0 no heuristic recovery was requested
4452
1 heuristic recovery was performed
4455
int TC_LOG::using_heuristic_recover()
4457
if (!tc_heuristic_recover)
4460
sql_print_information("Heuristic crash recovery mode");
4462
sql_print_error("Heuristic crash recovery failed");
4463
sql_print_information("Please restart mysqld without --tc-heuristic-recover");
4467
/****** transaction coordinator log for 2pc - binlog() based solution ******/
4468
#define TC_LOG_BINLOG MYSQL_BIN_LOG
4472
keep in-memory list of prepared transactions
4473
(add to list in log(), remove on unlog())
4474
and copy it to the new binlog if rotated
4475
but let's check the behaviour of tc_log_page_waits first!
4478
int TC_LOG_BINLOG::open(const char *opt_name)
4483
assert(total_ha_2pc > 1);
4484
assert(opt_name && opt_name[0]);
4486
pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
4487
pthread_cond_init (&COND_prep_xids, 0);
4489
if (!my_b_inited(&index_file))
4491
/* There was a failure to open the index file, can't open the binlog */
4496
if (using_heuristic_recover())
4498
/* generate a new binlog to mask a corrupted one */
4499
open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
4504
if ((error= find_log_pos(&log_info, NullS, 1)))
4506
if (error != LOG_INFO_EOF)
4507
sql_print_error("find_log_pos() failed (error: %d)", error);
4518
Format_description_log_event fdle(BINLOG_VERSION);
4519
char log_name[FN_REFLEN];
4521
if (! fdle.is_valid())
4526
strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
4527
} while (!(error= find_next_log(&log_info, 1)));
4529
if (error != LOG_INFO_EOF)
4531
sql_print_error("find_log_pos() failed (error: %d)", error);
4535
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
4537
sql_print_error("%s", errmsg);
4541
if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
4542
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4543
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
4545
sql_print_information("Recovering after a crash using %s", opt_name);
4546
error= recover(&log, (Format_description_log_event *)ev);
4553
my_close(file, MYF(MY_WME));
4563
/** This is called on shutdown, after ha_panic. */
4564
void TC_LOG_BINLOG::close()
4566
assert(prepared_xids==0);
4567
pthread_mutex_destroy(&LOCK_prep_xids);
4568
pthread_cond_destroy (&COND_prep_xids);
4580
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
4582
Xid_log_event xle(thd, xid);
4583
binlog_trx_data *trx_data=
4584
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
4586
We always commit the entire transaction when writing an XID. Also
4587
note that the return value is inverted.
4589
return(!binlog_end_trans(thd, trx_data, &xle, true));
4592
void TC_LOG_BINLOG::unlog(ulong cookie __attribute__((unused)),
4593
my_xid xid __attribute__((unused)))
4595
pthread_mutex_lock(&LOCK_prep_xids);
4596
assert(prepared_xids > 0);
4597
if (--prepared_xids == 0) {
4598
pthread_cond_signal(&COND_prep_xids);
4600
pthread_mutex_unlock(&LOCK_prep_xids);
4601
rotate_and_purge(0); // as ::write() did not rotate
4604
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
4610
if (! fdle->is_valid() ||
4611
hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
4612
sizeof(my_xid), 0, 0, MYF(0)))
4615
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
4617
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
4619
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
4621
if (ev->get_type_code() == XID_EVENT)
4623
Xid_log_event *xev=(Xid_log_event *)ev;
4624
uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
4628
my_hash_insert(&xids, x);
4633
if (ha_recover(&xids))
4636
free_root(&mem_root, MYF(0));
4641
free_root(&mem_root, MYF(0));
4644
sql_print_error("Crash recovery failed. Either correct the problem "
4645
"(if it's, for example, out of memory error) and restart, "
4646
"or delete (or rename) binary log and start mysqld with "
4647
"--tc-heuristic-recover={commit|rollback}");
4652
#ifdef INNODB_COMPATIBILITY_HOOKS
4654
Get the file name of the MySQL binlog.
4655
@return the name of the binlog file
4658
const char* mysql_bin_log_file_name(void)
4660
return mysql_bin_log.get_log_fname();
4663
Get the current position of the MySQL binlog.
4664
@return byte offset from the beginning of the binlog
4667
uint64_t mysql_bin_log_file_pos(void)
4669
return (uint64_t) mysql_bin_log.get_log_file()->pos_in_file;
4671
#endif /* INNODB_COMPATIBILITY_HOOKS */
4674
mysql_declare_plugin(binlog)
4676
MYSQL_STORAGE_ENGINE_PLUGIN,
4680
"This is a pseudo storage engine to represent the binlog in a transaction",
4682
binlog_init, /* Plugin Init */
4683
NULL, /* Plugin Deinit */
4684
NULL, /* status variables */
4685
NULL, /* system variables */
4686
NULL /* config options */
4688
mysql_declare_plugin_end;