1
/* Copyright (C) 2000-2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
24
Abort logging when we get an error in reading or writing log files
27
#include "mysql_priv.h"
29
#include "rpl_filter.h"
34
#include <m_ctype.h> // For test_if_number
36
#include <drizzle/plugin.h>
38
/* max size of the log message */
39
#define MAX_LOG_BUFFER_SIZE 1024
40
#define MAX_USER_HOST_SIZE 512
41
#define MAX_TIME_SIZE 32
42
#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
44
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
48
MYSQL_BIN_LOG mysql_bin_log;
49
ulong sync_binlog_counter= 0;
51
static bool test_if_number(const char *str,
52
long *res, bool allow_wildcards);
53
static int binlog_init(void *p);
54
static int binlog_close_connection(handlerton *hton, THD *thd);
55
static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
56
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
57
static int binlog_commit(handlerton *hton, THD *thd, bool all);
58
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
59
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
62
sql_print_message_func sql_print_message_handlers[3] =
64
sql_print_information,
70
char *make_default_log_name(char *buff,const char* log_ext)
72
strmake(buff, pidfile_name, FN_REFLEN-5);
73
return fn_format(buff, buff, mysql_data_home, log_ext,
74
MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
78
Helper class to hold a mutex for the duration of the
81
Eliminates the need for explicit unlocking of mutexes on, e.g.,
82
error returns. On passing a null pointer, the sentry will not do
88
Mutex_sentry(pthread_mutex_t *mutex)
92
pthread_mutex_lock(mutex);
98
pthread_mutex_unlock(m_mutex);
103
pthread_mutex_t *m_mutex;
105
// It's not allowed to copy this object in any way
106
Mutex_sentry(Mutex_sentry const&);
107
void operator=(Mutex_sentry const&);
111
Helper class to store binary log transaction data.
113
class binlog_trx_data {
116
: at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
118
trans_log.end_of_file= max_binlog_cache_size;
123
assert(pending() == NULL);
124
close_cached_file(&trans_log);
127
my_off_t position() const {
128
return my_b_tell(&trans_log);
133
return pending() == NULL && my_b_tell(&trans_log) == 0;
137
Truncate the transaction cache to a certain position. This
138
includes deleting the pending event.
140
void truncate(my_off_t pos)
144
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
145
if (pos < before_stmt_pos)
146
before_stmt_pos= MY_OFF_T_UNDEF;
149
The only valid positions that can be truncated to are at the
150
beginning of a statement. We are relying on this fact to be able
151
to set the at_least_one_stmt flag correctly. In other word, if
152
we are truncating to the beginning of the transaction cache,
153
there will be no statements in the cache, otherwhise, we will
154
have at least one statement in the transaction cache.
156
at_least_one_stmt= (pos > 0);
160
Reset the entire contents of the transaction cache, emptying it
166
before_stmt_pos= MY_OFF_T_UNDEF;
167
trans_log.end_of_file= max_binlog_cache_size;
170
Rows_log_event *pending() const
175
void set_pending(Rows_log_event *const pending)
180
IO_CACHE trans_log; // The transaction cache
183
Boolean that is true if there is at least one statement in the
186
bool at_least_one_stmt;
190
Pending binrows event. This event is the event where the rows are
193
Rows_log_event *m_pending;
197
Binlog position before the start of the current statement.
199
my_off_t before_stmt_pos;
202
handlerton *binlog_hton;
205
/* Check if a given table is opened log table */
206
int check_if_log_table(uint db_len __attribute__((__unused__)),
207
const char *db __attribute__((__unused__)),
208
uint table_name_len __attribute__((__unused__)),
209
const char *table_name __attribute__((__unused__)),
210
uint check_if_opened __attribute__((__unused__)))
215
/* log event handlers */
217
bool Log_to_file_event_handler::
218
log_error(enum loglevel level, const char *format,
221
return vprint_msg_to_log(level, format, args);
224
void Log_to_file_event_handler::init_pthread_objects()
226
mysql_log.init_pthread_objects();
227
mysql_slow_log.init_pthread_objects();
231
/** Wrapper around MYSQL_LOG::write() for slow log. */
233
bool Log_to_file_event_handler::
234
log_slow(THD *thd, time_t current_time, time_t query_start_arg,
235
const char *user_host, uint user_host_len,
236
uint64_t query_utime, uint64_t lock_utime, bool is_command,
237
const char *sql_text, uint sql_text_len)
239
return mysql_slow_log.write(thd, current_time, query_start_arg,
240
user_host, user_host_len,
241
query_utime, lock_utime, is_command,
242
sql_text, sql_text_len);
247
Wrapper around MYSQL_LOG::write() for general log. We need it since we
248
want all log event handlers to have the same signature.
251
bool Log_to_file_event_handler::
252
log_general(THD *thd __attribute__((__unused__)),
253
time_t event_time, const char *user_host,
254
uint user_host_len, int thread_id,
255
const char *command_type, uint command_type_len,
256
const char *sql_text, uint sql_text_len,
257
CHARSET_INFO *client_cs __attribute__((__unused__)))
259
return mysql_log.write(event_time, user_host, user_host_len,
260
thread_id, command_type, command_type_len,
261
sql_text, sql_text_len);
265
bool Log_to_file_event_handler::init()
270
mysql_slow_log.open_slow_log(sys_var_slow_log_path.value);
273
mysql_log.open_query_log(sys_var_general_log_path.value);
275
is_initialized= true;
282
void Log_to_file_event_handler::cleanup()
285
mysql_slow_log.cleanup();
288
void Log_to_file_event_handler::flush()
290
/* reopen log files */
292
mysql_log.reopen_file();
294
mysql_slow_log.reopen_file();
298
Log error with all enabled log event handlers
303
level The level of the error significance: NOTE,
305
format format string for the error message
306
args list of arguments for the format string
313
bool LOGGER::error_log_print(enum loglevel level, const char *format,
317
Log_event_handler **current_handler;
319
/* currently we don't need locking here as there is no error_log table */
320
for (current_handler= error_log_handler_list ; *current_handler ;)
321
error= (*current_handler++)->log_error(level, format, args) || error;
327
void LOGGER::cleanup_base()
330
rwlock_destroy(&LOCK_logger);
331
if (file_log_handler)
332
file_log_handler->cleanup();
336
void LOGGER::cleanup_end()
339
if (file_log_handler)
340
delete file_log_handler;
345
Perform basic log initialization: create file-based log handler and
348
void LOGGER::init_base()
354
Here we create file log handler. We don't do it for the table log handler
355
here as it cannot be created so early. The reason is THD initialization,
356
which depends on the system variables (parsed later).
358
if (!file_log_handler)
359
file_log_handler= new Log_to_file_event_handler;
361
/* by default we use traditional error log */
362
init_error_log(LOG_FILE);
364
file_log_handler->init_pthread_objects();
365
my_rwlock_init(&LOCK_logger, NULL);
369
bool LOGGER::flush_logs(THD *thd __attribute__((__unused__)))
374
Now we lock logger, as nobody should be able to use logging routines while
375
log tables are closed
377
logger.lock_exclusive();
379
/* reopen log files */
380
file_log_handler->flush();
382
/* end of log flush */
389
Log slow query with all enabled log event handlers
394
thd THD of the query being logged
395
query The query being logged
396
query_length The length of the query string
397
current_utime Current time in microseconds (from undefined start)
404
bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
405
uint64_t current_utime)
409
Log_event_handler **current_handler;
410
bool is_command= false;
411
char user_host_buff[MAX_USER_HOST_SIZE];
412
Security_context *sctx= thd->security_ctx;
413
uint user_host_len= 0;
414
uint64_t query_utime, lock_utime;
417
Print the message to the buffer if we have slow log enabled
420
if (*slow_log_handler_list)
424
/* do not log slow queries from replication threads */
425
if (thd->slave_thread && !opt_log_slow_slave_statements)
435
/* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
436
user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
437
sctx->priv_user ? sctx->priv_user : "", "[",
438
sctx->user ? sctx->user : "", "] @ ",
439
sctx->host ? sctx->host : "", " [",
440
sctx->ip ? sctx->ip : "", "]", NullS) -
443
current_time= my_time_possible_from_micro(current_utime);
444
if (thd->start_utime)
446
query_utime= (current_utime - thd->start_utime);
447
lock_utime= (thd->utime_after_lock - thd->start_utime);
451
query_utime= lock_utime= 0;
457
query= command_name[thd->command].str;
458
query_length= command_name[thd->command].length;
461
for (current_handler= slow_log_handler_list; *current_handler ;)
462
error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
463
user_host_buff, user_host_len,
464
query_utime, lock_utime, is_command,
465
query, query_length) || error;
472
bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
473
const char *query, uint query_length)
476
Log_event_handler **current_handler= general_log_handler_list;
477
char user_host_buff[MAX_USER_HOST_SIZE];
478
Security_context *sctx= thd->security_ctx;
480
uint user_host_len= 0;
484
id= thd->thread_id; /* Normal thread */
486
id= 0; /* Log from connect handler */
494
user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
495
sctx->priv_user ? sctx->priv_user : "", "[",
496
sctx->user ? sctx->user : "", "] @ ",
497
sctx->host ? sctx->host : "", " [",
498
sctx->ip ? sctx->ip : "", "]", NullS) -
501
current_time= my_time(0);
503
while (*current_handler)
504
error|= (*current_handler++)->
505
log_general(thd, current_time, user_host_buff,
507
command_name[(uint) command].str,
508
command_name[(uint) command].length,
510
thd->variables.character_set_client) || error;
516
bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
517
const char *format, va_list args)
519
uint message_buff_len= 0;
520
char message_buff[MAX_LOG_BUFFER_SIZE];
522
/* prepare message */
524
message_buff_len= vsnprintf(message_buff, sizeof(message_buff),
527
message_buff[0]= '\0';
529
return general_log_write(thd, command, message_buff, message_buff_len);
532
void LOGGER::init_error_log(uint error_log_printer)
534
if (error_log_printer & LOG_NONE)
536
error_log_handler_list[0]= 0;
540
switch (error_log_printer) {
542
error_log_handler_list[0]= file_log_handler;
543
error_log_handler_list[1]= 0;
545
/* these two are disabled for now */
549
case LOG_TABLE|LOG_FILE:
555
void LOGGER::init_slow_log(uint slow_log_printer)
557
if (slow_log_printer & LOG_NONE)
559
slow_log_handler_list[0]= 0;
563
slow_log_handler_list[0]= file_log_handler;
564
slow_log_handler_list[1]= 0;
567
void LOGGER::init_general_log(uint general_log_printer)
569
if (general_log_printer & LOG_NONE)
571
general_log_handler_list[0]= 0;
575
general_log_handler_list[0]= file_log_handler;
576
general_log_handler_list[1]= 0;
580
bool LOGGER::activate_log_handler(THD* thd __attribute__((__unused__)),
583
MYSQL_QUERY_LOG *file_log;
590
file_log= file_log_handler->get_mysql_slow_log();
592
file_log->open_slow_log(sys_var_slow_log_path.value);
593
init_slow_log(log_output_options);
597
case QUERY_LOG_GENERAL:
600
file_log= file_log_handler->get_mysql_log();
602
file_log->open_query_log(sys_var_general_log_path.value);
603
init_general_log(log_output_options);
615
void LOGGER::deactivate_log_handler(THD *thd __attribute__((__unused__)),
623
tmp_opt= &opt_slow_log;
624
file_log= file_log_handler->get_mysql_slow_log();
626
case QUERY_LOG_GENERAL:
628
file_log= file_log_handler->get_mysql_log();
631
assert(0); // Impossible
643
int LOGGER::set_handlers(uint error_log_printer,
644
uint slow_log_printer,
645
uint general_log_printer)
647
/* error log table is not supported yet */
648
assert(error_log_printer < LOG_TABLE);
652
init_error_log(error_log_printer);
653
init_slow_log(slow_log_printer);
654
init_general_log(general_log_printer);
663
Save position of binary log transaction cache.
666
binlog_trans_log_savepos()
668
thd The thread to take the binlog data from
669
pos Pointer to variable where the position will be stored
673
Save the current position in the binary log transaction cache into
674
the variable pointed to by 'pos'
678
binlog_trans_log_savepos(THD *thd, my_off_t *pos)
681
if (thd_get_ha_data(thd, binlog_hton) == NULL)
682
thd->binlog_setup_trx_data();
683
binlog_trx_data *const trx_data=
684
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
685
assert(mysql_bin_log.is_open());
686
*pos= trx_data->position();
692
Truncate the binary log transaction cache.
695
binlog_trans_log_truncate()
697
thd The thread to take the binlog data from
698
pos Position to truncate to
702
Truncate the binary log to the given position. Will not change
707
binlog_trans_log_truncate(THD *thd, my_off_t pos)
709
assert(thd_get_ha_data(thd, binlog_hton) != NULL);
710
/* Only true if binlog_trans_log_savepos() wasn't called before */
711
assert(pos != ~(my_off_t) 0);
713
binlog_trx_data *const trx_data=
714
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
715
trx_data->truncate(pos);
721
this function is mostly a placeholder.
722
conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
723
should be moved here.
726
int binlog_init(void *p)
728
binlog_hton= (handlerton *)p;
729
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
730
binlog_hton->db_type=DB_TYPE_BINLOG;
731
binlog_hton->savepoint_offset= sizeof(my_off_t);
732
binlog_hton->close_connection= binlog_close_connection;
733
binlog_hton->savepoint_set= binlog_savepoint_set;
734
binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
735
binlog_hton->commit= binlog_commit;
736
binlog_hton->rollback= binlog_rollback;
737
binlog_hton->prepare= binlog_prepare;
738
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
742
static int binlog_close_connection(handlerton *hton __attribute__((__unused__)),
745
binlog_trx_data *const trx_data=
746
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
747
assert(trx_data->empty());
748
thd_set_ha_data(thd, binlog_hton, NULL);
749
trx_data->~binlog_trx_data();
750
my_free((uchar*)trx_data, MYF(0));
760
thd The thread whose transaction should be ended
761
trx_data Pointer to the transaction data to use
762
end_ev The end event to use, or NULL
763
all True if the entire transaction should be ended, false if
764
only the statement transaction should be ended.
768
End the currently open transaction. The transaction can be either
769
a real transaction (if 'all' is true) or a statement transaction
772
If 'end_ev' is NULL, the transaction is a rollback of only
773
transactional tables, so the transaction cache will be truncated
774
to either just before the last opened statement transaction (if
775
'all' is false), or reset completely (if 'all' is true).
778
binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
779
Log_event *end_ev, bool all)
782
IO_CACHE *trans_log= &trx_data->trans_log;
785
NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
786
only transactional tables. If the transaction contain changes to
787
any non-transactiona tables, we need write the transaction and log
793
Doing a commit or a rollback including non-transactional tables,
794
i.e., ending a transaction where we might write the transaction
795
cache to the binary log.
797
We can always end the statement when ending a transaction since
798
transactions are not allowed inside stored functions. If they
799
were, we would have to ensure that we're not ending a statement
800
inside a stored function.
802
thd->binlog_flush_pending_rows_event(true);
804
error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
808
We need to step the table map version after writing the
809
transaction cache to disk.
811
mysql_bin_log.update_table_map_version();
812
statistic_increment(binlog_cache_use, &LOCK_status);
813
if (trans_log->disk_writes != 0)
815
statistic_increment(binlog_cache_disk_use, &LOCK_status);
816
trans_log->disk_writes= 0;
822
If rolling back an entire transaction or a single statement not
823
inside a transaction, we reset the transaction cache.
825
If rolling back a statement in a transaction, we truncate the
826
transaction cache to remove the statement.
828
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
832
assert(!thd->binlog_get_pending_rows_event());
833
thd->clear_binlog_table_maps();
836
trx_data->truncate(trx_data->before_stmt_pos);
839
We need to step the table map version on a rollback to ensure
840
that a new table map event is generated instead of the one that
841
was written to the thrown-away transaction cache.
843
mysql_bin_log.update_table_map_version();
849
static int binlog_prepare(handlerton *hton __attribute__((__unused__)),
850
THD *thd __attribute__((__unused__)),
851
bool all __attribute__((__unused__)))
855
just pretend we can do 2pc, so that MySQL won't
857
real work will be done in MYSQL_BIN_LOG::log_xid()
862
#define YESNO(X) ((X) ? "yes" : "no")
865
This function is called once after each statement.
867
It has the responsibility to flush the transaction cache to the
868
binlog file on commits.
870
@param hton The binlog handlerton.
871
@param thd The client thread that executes the transaction.
872
@param all This is @c true if this is a real transaction commit, and
875
@see handlerton::commit
877
static int binlog_commit(handlerton *hton __attribute__((__unused__)),
880
binlog_trx_data *const trx_data=
881
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
883
if (trx_data->empty())
885
// we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid()
891
Decision table for committing a transaction. The top part, the
892
*conditions* represent different cases that can occur, and hte
893
bottom part, the *actions*, represent what should be done in that
896
Real transaction 'all' was true
898
Statement in cache There were at least one statement in the
901
In transaction We are inside a transaction
903
Stmt modified non-trans The statement being committed modified a
904
non-transactional table
906
All modified non-trans Some statement before this one in the
907
transaction modified a non-transactional
911
============================= = = = = = = = = = = = = = = = =
912
Real transaction N N N N N N N N N N N N N N N N
913
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
914
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
915
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
916
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
918
Action: (C)ommit/(A)ccumulate C C - C A C - C - - - - A A - A
919
============================= = = = = = = = = = = = = = = = =
922
============================= = = = = = = = = = = = = = = = =
923
Real transaction Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y
924
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
925
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
926
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
927
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
929
(C)ommit/(A)ccumulate/(-) - - - - C C - C - - - - C C - C
930
============================= = = = = = = = = = = = = = = = =
932
In other words, we commit the transaction if and only if both of
933
the following are true:
934
- We are not in a transaction and committing a statement
936
- We are in a transaction and one (or more) of the following are
939
- A full transaction is committed
943
- A non-transactional statement is committed and there is
946
Otherwise, we accumulate the statement
948
uint64_t const in_transaction=
949
thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
950
if ((in_transaction && (all || (!trx_data->at_least_one_stmt && thd->transaction.stmt.modified_non_trans_table))) || (!in_transaction && !all))
952
Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), true, false);
953
qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
954
int error= binlog_end_trans(thd, trx_data, &qev, all);
961
This function is called when a transaction involving a transactional
962
table is rolled back.
964
It has the responsibility to flush the transaction cache to the
965
binlog file. However, if the transaction does not involve
966
non-transactional tables, nothing needs to be logged.
968
@param hton The binlog handlerton.
969
@param thd The client thread that executes the transaction.
970
@param all This is @c true if this is a real transaction rollback, and
973
@see handlerton::rollback
975
static int binlog_rollback(handlerton *hton __attribute__((__unused__)),
979
binlog_trx_data *const trx_data=
980
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
982
if (trx_data->empty()) {
987
if ((all && thd->transaction.all.modified_non_trans_table) ||
988
(!all && thd->transaction.stmt.modified_non_trans_table) ||
989
(thd->options & OPTION_KEEP_LOG))
992
We write the transaction cache with a rollback last if we have
993
modified any non-transactional table. We do this even if we are
994
committing a single statement that has modified a
995
non-transactional table since it can have modified a
996
transactional table in that statement as well, which needs to be
997
rolled back on the slave.
999
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), true, false);
1000
qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE)
1001
error= binlog_end_trans(thd, trx_data, &qev, all);
1003
else if ((all && !thd->transaction.all.modified_non_trans_table) ||
1004
(!all && !thd->transaction.stmt.modified_non_trans_table))
1007
If we have modified only transactional tables, we can truncate
1008
the transaction cache without writing anything to the binary
1011
error= binlog_end_trans(thd, trx_data, 0, all);
1018
How do we handle this (unlikely but legal) case:
1020
[transaction] + [update to non-trans table] + [rollback to savepoint] ?
1022
The problem occurs when a savepoint is before the update to the
1023
non-transactional table. Then when there's a rollback to the savepoint, if we
1024
simply truncate the binlog cache, we lose the part of the binlog cache where
1025
the update is. If we want to not lose it, we need to write the SAVEPOINT
1026
command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1027
is easy: it's just write at the end of the binlog cache, but the former
1028
should be *inserted* to the place where the user called SAVEPOINT. The
1029
solution is that when the user calls SAVEPOINT, we write it to the binlog
1030
cache (so no need to later insert it). As transactions are never intermixed
1031
in the binary log (i.e. they are serialized), we won't have conflicts with
1032
savepoint names when using mysqlbinlog or in the slave SQL thread.
1033
Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1034
non-transactional table, we don't truncate the binlog cache but instead write
1035
ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1036
will chop the SAVEPOINT command from the binlog cache, which is good as in
1037
that case there is no need to have it in the binlog).
1040
static int binlog_savepoint_set(handlerton *hton __attribute__((__unused__)),
1043
binlog_trans_log_savepos(thd, (my_off_t*) sv);
1044
/* Write it to the binary log */
1047
thd->binlog_query(THD::STMT_QUERY_TYPE,
1048
thd->query, thd->query_length, true, false);
1052
static int binlog_savepoint_rollback(handlerton *hton __attribute__((__unused__)),
1056
Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1057
non-transactional table. Otherwise, truncate the binlog cache starting
1058
from the SAVEPOINT command.
1060
if (unlikely(thd->transaction.all.modified_non_trans_table ||
1061
(thd->options & OPTION_KEEP_LOG)))
1064
thd->binlog_query(THD::STMT_QUERY_TYPE,
1065
thd->query, thd->query_length, true, false);
1068
binlog_trans_log_truncate(thd, *(my_off_t*)sv);
1073
int check_binlog_magic(IO_CACHE* log, const char** errmsg)
1076
assert(my_b_tell(log) == 0);
1078
if (my_b_read(log, (uchar*) magic, sizeof(magic)))
1080
*errmsg = "I/O error reading the header from the binary log";
1081
sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
1085
if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
1087
*errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
1094
File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
1098
if ((file = my_open(log_file_name, O_RDONLY | O_BINARY | O_SHARE,
1101
sql_print_error("Failed to open log (file '%s', errno %d)",
1102
log_file_name, my_errno);
1103
*errmsg = "Could not open log file";
1106
if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
1107
MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
1109
sql_print_error("Failed to create a cache on log (file '%s')",
1111
*errmsg = "Could not open log file";
1114
if (check_binlog_magic(log,errmsg))
1121
my_close(file,MYF(0));
1129
Find a unique filename for 'filename.#'.
1131
Set '#' to a number as low as possible.
1134
nonzero if not possible to get unique filename
1137
static int find_uniq_filename(char *name)
1141
char buff[FN_REFLEN];
1142
struct st_my_dir *dir_info;
1143
register struct fileinfo *file_info;
1145
size_t buf_length, length;
1148
length= dirname_part(buff, name, &buf_length);
1149
start= name + length;
1153
length= (size_t) (end-start+1);
1155
if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
1156
{ // This shouldn't happen
1157
strmov(end,".1"); // use name+1
1160
file_info= dir_info->dir_entry;
1161
for (i=dir_info->number_off_files ; i-- ; file_info++)
1163
if (bcmp((uchar*) file_info->name, (uchar*) start, length) == 0 &&
1164
test_if_number(file_info->name+length, &number,0))
1166
set_if_bigger(max_found,(ulong) number);
1169
my_dirend(dir_info);
1172
sprintf(end,"%06ld",max_found+1);
1177
void MYSQL_LOG::init(enum_log_type log_type_arg,
1178
enum cache_type io_cache_type_arg)
1180
log_type= log_type_arg;
1181
io_cache_type= io_cache_type_arg;
1187
Open a (new) log file.
1192
log_name The name of the log to open
1193
log_type_arg The type of the log. E.g. LOG_NORMAL
1194
new_name The new name for the logfile. This is only needed
1195
when the method is used to open the binlog file.
1196
io_cache_type_arg The type of the IO_CACHE to use for this log file
1199
Open the logfile, init IO_CACHE and write startup messages
1200
(in case of general and slow query logs).
1207
bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
1208
const char *new_name, enum cache_type io_cache_type_arg)
1210
char buff[FN_REFLEN];
1212
int open_flags= O_CREAT | O_BINARY;
1216
init(log_type_arg, io_cache_type_arg);
1218
if (!(name= my_strdup(log_name, MYF(MY_WME))))
1220
name= (char *)log_name; // for the error message
1225
strmov(log_file_name, new_name);
1226
else if (generate_new_name(log_file_name, name))
1229
if (io_cache_type == SEQ_READ_APPEND)
1230
open_flags |= O_RDWR | O_APPEND;
1232
open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
1236
if ((file= my_open(log_file_name, open_flags,
1237
MYF(MY_WME | ME_WAITTANG))) < 0 ||
1238
init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
1239
my_tell(file, MYF(MY_WME)), 0,
1240
MYF(MY_WME | MY_NABP |
1241
((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
1244
if (log_type == LOG_NORMAL)
1247
int len=snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
1248
"started with:\nTCP Port: %d, Named Pipe: %s\n",
1249
my_progname, server_version, MYSQL_COMPILATION_COMMENT,
1252
end= strnmov(buff + len, "Time Id Command Argument\n",
1253
sizeof(buff) - len);
1254
if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
1255
flush_io_cache(&log_file))
1259
log_state= LOG_OPENED;
1263
sql_print_error("Could not use %s for logging (error %d). \
1264
Turning logging off for the whole duration of the MySQL server process. \
1265
To turn it on again: fix the cause, \
1266
shutdown the MySQL server and restart it.", name, errno);
1268
my_close(file, MYF(0));
1269
end_io_cache(&log_file);
1271
log_state= LOG_CLOSED;
1275
MYSQL_LOG::MYSQL_LOG()
1276
: name(0), write_error(false), inited(false), log_type(LOG_UNKNOWN),
1277
log_state(LOG_CLOSED)
1280
We don't want to initialize LOCK_Log here as such initialization depends on
1281
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1282
called only in main(). Doing initialization here would make it happen
1285
bzero((char*) &log_file, sizeof(log_file));
1288
void MYSQL_LOG::init_pthread_objects()
1290
assert(inited == 0);
1292
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1300
exiting Bitmask. For the slow and general logs the only used bit is
1301
LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call
1302
open at once after close.
1305
One can do an open on the object at once after doing a close.
1306
The internal structures are not freed until cleanup() is called
1309
void MYSQL_LOG::close(uint exiting)
1310
{ // One can't set log_type here!
1311
if (log_state == LOG_OPENED)
1313
end_io_cache(&log_file);
1315
if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error)
1318
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1321
if (my_close(log_file.file, MYF(MY_WME)) && ! write_error)
1324
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1328
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
1333
/** This is called only once. */
1335
void MYSQL_LOG::cleanup()
1340
(void) pthread_mutex_destroy(&LOCK_log);
1347
int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name)
1349
fn_format(new_name, log_name, mysql_data_home, "", 4);
1350
if (log_type == LOG_BIN)
1352
if (!fn_ext(log_name)[0])
1354
if (find_uniq_filename(new_name))
1356
sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
1372
Reopen the log file. The method is used during FLUSH LOGS
1373
and locks LOCK_log mutex
1377
void MYSQL_QUERY_LOG::reopen_file()
1386
pthread_mutex_lock(&LOCK_log);
1389
name= 0; // Don't free name
1390
close(LOG_CLOSE_TO_BE_OPENED);
1393
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
1396
open(save_name, log_type, 0, io_cache_type);
1397
my_free(save_name, MYF(0));
1399
pthread_mutex_unlock(&LOCK_log);
1406
Write a command to traditional general log file
1411
event_time command start timestamp
1412
user_host the pointer to the string with user@host info
1413
user_host_len length of the user_host string. this is computed once
1414
and passed to all general log event handlers
1415
thread_id Id of the thread, issued a query
1416
command_type the type of the command being logged
1417
command_type_len the length of the string above
1418
sql_text the very text of the query being executed
1419
sql_text_len the length of sql_text string
1423
Log given command to to normal (not rotable) log file
1427
TRUE - error occured
1430
bool MYSQL_QUERY_LOG::write(time_t event_time,
1431
const char *user_host __attribute__((__unused__)),
1432
uint user_host_len __attribute__((__unused__)),
1434
const char *command_type, uint command_type_len,
1435
const char *sql_text, uint sql_text_len)
1439
char local_time_buff[MAX_TIME_SIZE];
1441
uint time_buff_len= 0;
1443
(void) pthread_mutex_lock(&LOCK_log);
1445
/* Test if someone closed between the is_open test and lock */
1448
/* Note that my_b_write() assumes it knows the length for this */
1449
if (event_time != last_time)
1451
last_time= event_time;
1453
localtime_r(&event_time, &start);
1455
time_buff_len= snprintf(local_time_buff, MAX_TIME_SIZE,
1456
"%02d%02d%02d %2d:%02d:%02d",
1457
start.tm_year % 100, start.tm_mon + 1,
1458
start.tm_mday, start.tm_hour,
1459
start.tm_min, start.tm_sec);
1461
if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
1465
if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
1468
/* command_type, thread_id */
1469
length= snprintf(buff, 32, "%5ld ", (long) thread_id);
1471
if (my_b_write(&log_file, (uchar*) buff, length))
1474
if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
1477
if (my_b_write(&log_file, (uchar*) "\t", 1))
1481
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
1484
if (my_b_write(&log_file, (uchar*) "\n", 1) ||
1485
flush_io_cache(&log_file))
1489
(void) pthread_mutex_unlock(&LOCK_log);
1496
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1498
(void) pthread_mutex_unlock(&LOCK_log);
1504
Log a query to the traditional slow log file
1509
thd THD of the query
1510
current_time current timestamp
1511
query_start_arg command start timestamp
1512
user_host the pointer to the string with user@host info
1513
user_host_len length of the user_host string. this is computed once
1514
and passed to all general log event handlers
1515
query_utime Amount of time the query took to execute (in microseconds)
1516
lock_utime Amount of time the query was locked (in microseconds)
1517
is_command The flag, which determines, whether the sql_text is a
1518
query or an administrator command.
1519
sql_text the very text of the query or administrator command
1521
sql_text_len the length of sql_text string
1525
Log a query to the slow log file.
1529
TRUE - error occured
1532
bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
1533
time_t query_start_arg __attribute__((__unused__)),
1534
const char *user_host,
1535
uint user_host_len, uint64_t query_utime,
1536
uint64_t lock_utime, bool is_command,
1537
const char *sql_text, uint sql_text_len)
1541
(void) pthread_mutex_lock(&LOCK_log);
1545
(void) pthread_mutex_unlock(&LOCK_log);
1550
{ // Safety agains reopen
1552
char buff[80], *end;
1553
char query_time_buff[22+7], lock_time_buff[22+7];
1557
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1559
if (current_time != last_time)
1561
last_time= current_time;
1563
localtime_r(¤t_time, &start);
1565
buff_len= snprintf(buff, sizeof buff,
1566
"# Time: %02d%02d%02d %2d:%02d:%02d\n",
1567
start.tm_year % 100, start.tm_mon + 1,
1568
start.tm_mday, start.tm_hour,
1569
start.tm_min, start.tm_sec);
1571
/* Note that my_b_write() assumes it knows the length for this */
1572
if (my_b_write(&log_file, (uchar*) buff, buff_len))
1575
const uchar uh[]= "# User@Host: ";
1576
if (my_b_write(&log_file, uh, sizeof(uh) - 1))
1578
if (my_b_write(&log_file, (uchar*) user_host, user_host_len))
1580
if (my_b_write(&log_file, (uchar*) "\n", 1))
1583
/* For slow query log */
1584
sprintf(query_time_buff, "%.6f", uint64_t2double(query_utime)/1000000.0);
1585
sprintf(lock_time_buff, "%.6f", uint64_t2double(lock_utime)/1000000.0);
1586
if (my_b_printf(&log_file,
1587
"# Query_time: %s Lock_time: %s"
1588
" Rows_sent: %lu Rows_examined: %lu\n",
1589
query_time_buff, lock_time_buff,
1590
(ulong) thd->sent_row_count,
1591
(ulong) thd->examined_row_count) == (uint) -1)
1593
if (thd->db && strcmp(thd->db, db))
1594
{ // Database changed
1595
if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
1599
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
1601
end=strmov(end, ",last_insert_id=");
1602
end=int64_t10_to_str((int64_t)
1603
thd->first_successful_insert_id_in_prev_stmt_for_binlog,
1606
// Save value if we do an insert.
1607
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
1609
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1611
end=strmov(end,",insert_id=");
1612
end=int64_t10_to_str((int64_t)
1613
thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
1619
This info used to show up randomly, depending on whether the query
1620
checked the query start time or not. now we always write current
1621
timestamp to the slow log
1623
end= strmov(end, ",timestamp=");
1624
end= int10_to_str((long) current_time, end, 10);
1630
if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
1631
my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
1636
end= strxmov(buff, "# administrator command: ", NullS);
1637
buff_len= (ulong) (end - buff);
1638
my_b_write(&log_file, (uchar*) buff, buff_len);
1640
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
1641
my_b_write(&log_file, (uchar*) ";\n",2) ||
1642
flush_io_cache(&log_file))
1650
sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
1654
(void) pthread_mutex_unlock(&LOCK_log);
1661
The following should be using fn_format(); We just need to
1662
first change fn_format() to cut the file name if it's too long.
1664
const char *MYSQL_LOG::generate_name(const char *log_name,
1666
bool strip_ext, char *buff)
1668
if (!log_name || !log_name[0])
1670
strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
1671
return (const char *)
1672
fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
1674
// get rid of extension if the log is binary to avoid problems
1677
char *p= fn_ext(log_name);
1678
uint length= (uint) (p - log_name);
1679
strmake(buff, log_name, min(length, FN_REFLEN));
1680
return (const char*)buff;
1687
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
1688
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
1689
need_start_event(true), m_table_map_version(0),
1690
description_event_for_exec(0), description_event_for_queue(0)
1693
We don't want to initialize locks here as such initialization depends on
1694
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1695
called only in main(). Doing initialization here would make it happen
1698
index_file_name[0] = 0;
1699
bzero((char*) &index_file, sizeof(index_file));
1702
/* this is called only once */
1704
void MYSQL_BIN_LOG::cleanup()
1709
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
1710
delete description_event_for_queue;
1711
delete description_event_for_exec;
1712
(void) pthread_mutex_destroy(&LOCK_log);
1713
(void) pthread_mutex_destroy(&LOCK_index);
1714
(void) pthread_cond_destroy(&update_cond);
1720
/* Init binlog-specific vars */
1721
void MYSQL_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
1723
no_auto_events= no_auto_events_arg;
1724
max_size= max_size_arg;
1729
void MYSQL_BIN_LOG::init_pthread_objects()
1731
assert(inited == 0);
1733
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1734
(void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
1735
(void) pthread_cond_init(&update_cond, 0);
1739
bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
1740
const char *log_name)
1742
File index_file_nr= -1;
1743
assert(!my_b_inited(&index_file));
1746
First open of this class instance
1747
Create an index file that will hold all file names uses for logging.
1748
Add new entries to the end of it.
1750
myf opt= MY_UNPACK_FILENAME;
1751
if (!index_file_name_arg)
1753
index_file_name_arg= log_name; // Use same basename for index file
1754
opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
1756
fn_format(index_file_name, index_file_name_arg, mysql_data_home,
1758
if ((index_file_nr= my_open(index_file_name,
1759
O_RDWR | O_CREAT | O_BINARY ,
1760
MYF(MY_WME))) < 0 ||
1761
my_sync(index_file_nr, MYF(MY_WME)) ||
1762
init_io_cache(&index_file, index_file_nr,
1763
IO_SIZE, WRITE_CACHE,
1764
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
1765
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
1768
TODO: all operations creating/deleting the index file or a log, should
1769
call my_sync_dir() or my_sync_dir_by_file() to be durable.
1770
TODO: file creation should be done with my_create() not my_open().
1772
if (index_file_nr >= 0)
1773
my_close(index_file_nr,MYF(0));
1781
Open a (new) binlog file.
1783
- Open the log file and the index file. Register the new
1785
- When calling this when the file is in use, you must have a locks
1786
on LOCK_log and LOCK_index.
1794
bool MYSQL_BIN_LOG::open(const char *log_name,
1795
enum_log_type log_type_arg,
1796
const char *new_name,
1797
enum cache_type io_cache_type_arg,
1798
bool no_auto_events_arg,
1800
bool null_created_arg)
1806
/* open the main log file */
1807
if (MYSQL_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
1808
return(1); /* all warnings issued */
1810
init(no_auto_events_arg, max_size_arg);
1814
assert(log_type == LOG_BIN);
1817
bool write_file_name_to_index_file=0;
1819
if (!my_b_filelength(&log_file))
1822
The binary log file was empty (probably newly created)
1823
This is the normal case and happens when the user doesn't specify
1824
an extension for the binary log files.
1825
In this case we write a standard header to it.
1827
if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
1828
BIN_LOG_HEADER_SIZE))
1830
bytes_written+= BIN_LOG_HEADER_SIZE;
1831
write_file_name_to_index_file= 1;
1834
assert(my_b_inited(&index_file) != 0);
1835
reinit_io_cache(&index_file, WRITE_CACHE,
1836
my_b_filelength(&index_file), 0, 0);
1837
if (need_start_event && !no_auto_events)
1840
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
1841
even if this is not the very first binlog.
1843
Format_description_log_event s(BINLOG_VERSION);
1845
don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
1846
as we won't be able to reset it later
1848
if (io_cache_type == WRITE_CACHE)
1849
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
1852
s.dont_set_created= null_created_arg;
1853
if (s.write(&log_file))
1855
bytes_written+= s.data_written;
1857
if (description_event_for_queue &&
1858
description_event_for_queue->binlog_version>=4)
1861
This is a relay log written to by the I/O slave thread.
1862
Write the event so that others can later know the format of this relay
1864
Note that this event is very close to the original event from the
1865
master (it has binlog version of the master, event types of the
1866
master), so this is suitable to parse the next relay log's event. It
1867
has been produced by
1868
Format_description_log_event::Format_description_log_event(char* buf,).
1869
Why don't we want to write the description_event_for_queue if this
1870
event is for format<4 (3.23 or 4.x): this is because in that case, the
1871
description_event_for_queue describes the data received from the
1872
master, but not the data written to the relay log (*conversion*),
1873
which is in format 4 (slave's).
1876
Set 'created' to 0, so that in next relay logs this event does not
1877
trigger cleaning actions on the slave in
1878
Format_description_log_event::apply_event_impl().
1880
description_event_for_queue->created= 0;
1881
/* Don't set log_pos in event header */
1882
description_event_for_queue->artificial_event=1;
1884
if (description_event_for_queue->write(&log_file))
1886
bytes_written+= description_event_for_queue->data_written;
1888
if (flush_io_cache(&log_file) ||
1889
my_sync(log_file.file, MYF(MY_WME)))
1892
if (write_file_name_to_index_file)
1895
As this is a new log file, we write the file name to the index
1896
file. As every time we write to the index file, we sync it.
1898
if (my_b_write(&index_file, (uchar*) log_file_name,
1899
strlen(log_file_name)) ||
1900
my_b_write(&index_file, (uchar*) "\n", 1) ||
1901
flush_io_cache(&index_file) ||
1902
my_sync(index_file.file, MYF(MY_WME)))
1906
log_state= LOG_OPENED;
1911
sql_print_error("Could not use %s for logging (error %d). \
1912
Turning logging off for the whole duration of the MySQL server process. \
1913
To turn it on again: fix the cause, \
1914
shutdown the MySQL server and restart it.", name, errno);
1916
my_close(file,MYF(0));
1917
end_io_cache(&log_file);
1918
end_io_cache(&index_file);
1920
log_state= LOG_CLOSED;
1925
int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
1927
pthread_mutex_lock(&LOCK_log);
1928
int ret = raw_get_current_log(linfo);
1929
pthread_mutex_unlock(&LOCK_log);
1933
int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
1935
strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
1936
linfo->pos = my_b_tell(&log_file);
1941
Move all data up in a file in an filename index file.
1943
We do the copy outside of the IO_CACHE as the cache buffers would just
1944
make things slower and more complicated.
1945
In most cases the copy loop should only do one read.
1947
@param index_file File to move
1948
@param offset Move everything from here to beginning
1951
File will be truncated to be 'offset' shorter or filled up with newlines
1957
#ifdef HAVE_REPLICATION
1959
static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
1962
my_off_t init_offset= offset;
1963
File file= index_file->file;
1964
uchar io_buf[IO_SIZE*2];
1966
for (;; offset+= bytes_read)
1968
(void) my_seek(file, offset, MY_SEEK_SET, MYF(0));
1969
if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
1973
break; // end of file
1974
(void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
1975
if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
1978
/* The following will either truncate the file or fill the end with \n' */
1979
if (ftruncate(file, offset - init_offset) || my_sync(file, MYF(MY_WME)))
1982
/* Reset data in old index cache */
1983
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
1990
#endif /* HAVE_REPLICATION */
1993
Find the position in the log-index-file for the given log name.
1995
@param linfo Store here the found log file name and position to
1996
the NEXT log file name in the index file.
1997
@param log_name Filename to find in the index file.
1998
Is a null pointer if we want to read the first entry
1999
@param need_lock Set this to 1 if the parent doesn't already have a
2003
On systems without the truncate function the file will end with one or
2004
more empty lines. These will be ignored when reading the file.
2009
LOG_INFO_EOF End of log-index-file found
2011
LOG_INFO_IO Got IO error while reading file
2014
int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
2018
char *fname= linfo->log_file_name;
2019
uint log_name_len= log_name ? (uint) strlen(log_name) : 0;
2022
Mutex needed because we need to make sure the file pointer does not
2023
move from under our feet
2026
pthread_mutex_lock(&LOCK_index);
2027
safe_mutex_assert_owner(&LOCK_index);
2029
/* As the file is flushed, we can't get an error here */
2030
(void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
2035
my_off_t offset= my_b_tell(&index_file);
2036
/* If we get 0 or 1 characters, this is the end of the file */
2038
if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2040
/* Did not find the given entry; Return not found or error */
2041
error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2045
// if the log entry matches, null string matching anything
2047
(log_name_len == length-1 && fname[log_name_len] == '\n' &&
2048
!memcmp(fname, log_name, log_name_len)))
2050
fname[length-1]=0; // remove last \n
2051
linfo->index_file_start_offset= offset;
2052
linfo->index_file_offset = my_b_tell(&index_file);
2058
pthread_mutex_unlock(&LOCK_index);
2064
Find the position in the log-index-file for the given log name.
2067
linfo Store here the next log file name and position to
2068
the file name after that.
2070
need_lock Set this to 1 if the parent doesn't already have a
2074
- Before calling this function, one has to call find_log_pos()
2076
- Mutex needed because we need to make sure the file pointer does not move
2082
LOG_INFO_EOF End of log-index-file found
2084
LOG_INFO_IO Got IO error while reading file
2087
int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
2091
char *fname= linfo->log_file_name;
2094
pthread_mutex_lock(&LOCK_index);
2095
safe_mutex_assert_owner(&LOCK_index);
2097
/* As the file is flushed, we can't get an error here */
2098
(void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
2101
linfo->index_file_start_offset= linfo->index_file_offset;
2102
if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2104
error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2107
fname[length-1]=0; // kill \n
2108
linfo->index_file_offset = my_b_tell(&index_file);
2112
pthread_mutex_unlock(&LOCK_index);
2118
Delete all logs refered to in the index file.
2119
Start writing to a new log file.
2121
The new index file will only contain this file.
2126
If not called from slave thread, write start event to new log
2134
bool MYSQL_BIN_LOG::reset_logs(THD* thd)
2138
const char* save_name;
2142
We need to get both locks to be sure that no one is trying to
2143
write to the index log file.
2145
pthread_mutex_lock(&LOCK_log);
2146
pthread_mutex_lock(&LOCK_index);
2149
The following mutex is needed to ensure that no threads call
2150
'delete thd' as we would then risk missing a 'rollback' from this
2151
thread. If the transaction involved MyISAM tables, it should go
2152
into binlog even on rollback.
2154
VOID(pthread_mutex_lock(&LOCK_thread_count));
2156
/* Save variables so that we can reopen the log */
2158
name=0; // Protect against free
2159
close(LOG_CLOSE_TO_BE_OPENED);
2161
/* First delete all old log files */
2163
if (find_log_pos(&linfo, NullS, 0))
2171
if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
2173
if (my_errno == ENOENT)
2175
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2176
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2177
linfo.log_file_name);
2178
sql_print_information("Failed to delete file '%s'",
2179
linfo.log_file_name);
2185
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2186
ER_BINLOG_PURGE_FATAL_ERR,
2187
"a problem with deleting %s; "
2188
"consider examining correspondence "
2189
"of your binlog index file "
2190
"to the actual binlog files",
2191
linfo.log_file_name);
2196
if (find_next_log(&linfo, 0))
2200
/* Start logging with a new file */
2201
close(LOG_CLOSE_INDEX);
2202
if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
2204
if (my_errno == ENOENT)
2206
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2207
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2209
sql_print_information("Failed to delete file '%s'",
2216
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2217
ER_BINLOG_PURGE_FATAL_ERR,
2218
"a problem with deleting %s; "
2219
"consider examining correspondence "
2220
"of your binlog index file "
2221
"to the actual binlog files",
2227
if (!thd->slave_thread)
2229
if (!open_index_file(index_file_name, 0))
2230
open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0);
2231
my_free((uchar*) save_name, MYF(0));
2234
VOID(pthread_mutex_unlock(&LOCK_thread_count));
2235
pthread_mutex_unlock(&LOCK_index);
2236
pthread_mutex_unlock(&LOCK_log);
2242
Delete relay log files prior to rli->group_relay_log_name
2243
(i.e. all logs which are not involved in a non-finished group
2244
(transaction)), remove them from the index file and start on next
2248
- Protects index file with LOCK_index
2249
- Delete relevant relay log files
2250
- Copy all file names after these ones to the front of the index file
2251
- If the OS has truncate, truncate the file, else fill it with \n'
2252
- Read the next file name from the index file and store in rli->linfo
2254
@param rli Relay log information
2255
@param included If false, all relay logs that are strictly before
2256
rli->group_relay_log_name are deleted ; if true, the
2257
latter is deleted too (i.e. all relay logs
2258
read by the SQL slave thread are deleted).
2261
- This is only called from the slave-execute thread when it has read
2262
all commands from a relay log and want to switch to a new relay log.
2263
- When this happens, we can be in an active transaction as
2264
a transaction can span over two relay logs
2265
(although it is always written as a single block to the master's binary
2266
log, hence cannot span over two master's binary logs).
2271
LOG_INFO_EOF End of log-index-file found
2273
LOG_INFO_SEEK Could not allocate IO cache
2275
LOG_INFO_IO Got IO error while reading file
2278
#ifdef HAVE_REPLICATION
2280
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
2285
assert(rli->slave_running == 1);
2286
assert(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
2288
pthread_mutex_lock(&LOCK_index);
2289
pthread_mutex_lock(&rli->log_space_lock);
2290
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
2291
0, 0, &rli->log_space_total);
2292
// Tell the I/O thread to take the relay_log_space_limit into account
2293
rli->ignore_log_space_limit= 0;
2294
pthread_mutex_unlock(&rli->log_space_lock);
2297
Ok to broadcast after the critical region as there is no risk of
2298
the mutex being destroyed by this thread later - this helps save
2301
pthread_cond_broadcast(&rli->log_space_cond);
2304
Read the next log file name from the index file and pass it back to
2306
If included is true, we want the first relay log;
2307
otherwise we want the one after event_relay_log_name.
2309
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
2311
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
2312
(error=find_next_log(&rli->linfo, 0)))))
2315
sql_print_error("next log error: %d offset: %s log: %s included: %d",
2317
llstr(rli->linfo.index_file_offset,buff),
2318
rli->group_relay_log_name,
2324
Reset rli's coordinates to the current log.
2326
rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
2327
strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
2328
sizeof(rli->event_relay_log_name)-1);
2331
If we removed the rli->group_relay_log_name file,
2332
we must update the rli->group* coordinates, otherwise do not touch it as the
2333
group's execution is not finished (e.g. COMMIT not executed)
2337
rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
2338
strmake(rli->group_relay_log_name,rli->linfo.log_file_name,
2339
sizeof(rli->group_relay_log_name)-1);
2340
rli->notify_group_relay_log_name_update();
2343
/* Store where we are in the new file for the execution thread */
2344
flush_relay_log_info(rli);
2347
pthread_mutex_unlock(&LOCK_index);
2352
Update log index_file.
2355
int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
2357
if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
2360
// now update offsets in index file for running threads
2361
if (need_update_threads)
2362
adjust_linfo_offsets(log_info->index_file_start_offset);
2367
Remove all logs before the given log from disk and from the index file.
2369
@param to_log Delete all log file name before this file.
2370
@param included If true, to_log is deleted too.
2372
@param need_update_threads If we want to update the log coordinates of
2373
all threads. False for relay logs, true otherwise.
2374
@param freed_log_space If not null, decrement this variable of
2375
the amount of log space freed
2378
If any of the logs before the deleted one is in use,
2379
only purge logs up to this one.
2384
LOG_INFO_EOF to_log not found
2385
LOG_INFO_EMFILE too many files opened
2386
LOG_INFO_FATAL if any other than ENOENT error from
2387
stat() or my_delete()
2390
int MYSQL_BIN_LOG::purge_logs(const char *to_log,
2393
bool need_update_threads,
2394
uint64_t *decrease_log_space)
2402
pthread_mutex_lock(&LOCK_index);
2403
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
2407
File name exists in index file; delete until we find this file
2408
or a file that is used.
2410
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2412
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
2413
!log_in_use(log_info.log_file_name))
2416
if (stat(log_info.log_file_name, &s))
2418
if (errno == ENOENT)
2421
It's not fatal if we can't stat a log file that does not exist;
2422
If we could not stat, we won't delete.
2424
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2425
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2426
log_info.log_file_name);
2427
sql_print_information("Failed to execute stat on file '%s'",
2428
log_info.log_file_name);
2434
Other than ENOENT are fatal
2436
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2437
ER_BINLOG_PURGE_FATAL_ERR,
2438
"a problem with getting info on being purged %s; "
2439
"consider examining correspondence "
2440
"of your binlog index file "
2441
"to the actual binlog files",
2442
log_info.log_file_name);
2443
error= LOG_INFO_FATAL;
2449
if (!my_delete(log_info.log_file_name, MYF(0)))
2451
if (decrease_log_space)
2452
*decrease_log_space-= s.st_size;
2456
if (my_errno == ENOENT)
2458
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2459
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2460
log_info.log_file_name);
2461
sql_print_information("Failed to delete file '%s'",
2462
log_info.log_file_name);
2467
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2468
ER_BINLOG_PURGE_FATAL_ERR,
2469
"a problem with deleting %s; "
2470
"consider examining correspondence "
2471
"of your binlog index file "
2472
"to the actual binlog files",
2473
log_info.log_file_name);
2474
if (my_errno == EMFILE)
2476
error= LOG_INFO_EMFILE;
2478
error= LOG_INFO_FATAL;
2484
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2486
if (find_next_log(&log_info, 0) || exit_loop)
2491
If we get killed -9 here, the sysadmin would have to edit
2492
the log index file after restart - otherwise, this should be safe
2494
error= update_log_index(&log_info, need_update_threads);
2501
pthread_mutex_unlock(&LOCK_index);
2506
Remove all logs before the given file date from disk and from the
2509
@param thd Thread pointer
2510
@param before_date Delete all log files before given date.
2513
If any of the logs before the deleted one is in use,
2514
only purge logs up to this one.
2519
LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
2520
LOG_INFO_FATAL if any other than ENOENT error from
2521
stat() or my_delete()
2524
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
2528
struct stat stat_area;
2530
pthread_mutex_lock(&LOCK_index);
2533
Delete until we find curren file
2534
or a file that is used or a file
2535
that is older than purge_time.
2537
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2540
while (strcmp(log_file_name, log_info.log_file_name) &&
2541
!log_in_use(log_info.log_file_name))
2543
if (stat(log_info.log_file_name, &stat_area))
2545
if (errno == ENOENT)
2548
It's not fatal if we can't stat a log file that does not exist.
2550
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2551
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2552
log_info.log_file_name);
2553
sql_print_information("Failed to execute stat on file '%s'",
2554
log_info.log_file_name);
2560
Other than ENOENT are fatal
2562
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2563
ER_BINLOG_PURGE_FATAL_ERR,
2564
"a problem with getting info on being purged %s; "
2565
"consider examining correspondence "
2566
"of your binlog index file "
2567
"to the actual binlog files",
2568
log_info.log_file_name);
2569
error= LOG_INFO_FATAL;
2575
if (stat_area.st_mtime >= purge_time)
2577
if (my_delete(log_info.log_file_name, MYF(0)))
2579
if (my_errno == ENOENT)
2581
/* It's not fatal even if we can't delete a log file */
2582
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
2583
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2584
log_info.log_file_name);
2585
sql_print_information("Failed to delete file '%s'",
2586
log_info.log_file_name);
2591
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
2592
ER_BINLOG_PURGE_FATAL_ERR,
2593
"a problem with deleting %s; "
2594
"consider examining correspondence "
2595
"of your binlog index file "
2596
"to the actual binlog files",
2597
log_info.log_file_name);
2598
error= LOG_INFO_FATAL;
2602
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2604
if (find_next_log(&log_info, 0))
2609
If we get killed -9 here, the sysadmin would have to edit
2610
the log index file after restart - otherwise, this should be safe
2612
error= update_log_index(&log_info, 1);
2615
pthread_mutex_unlock(&LOCK_index);
2618
#endif /* HAVE_REPLICATION */
2622
Create a new log file name.
2624
@param buf buf of at least FN_REFLEN where new name is stored
2627
If file name will be longer then FN_REFLEN it will be truncated
2630
void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
2632
uint dir_len = dirname_length(log_file_name);
2633
if (dir_len >= FN_REFLEN)
2634
dir_len=FN_REFLEN-1;
2635
strnmov(buf, log_file_name, dir_len);
2636
strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
2641
Check if we are writing/reading to the given log file.
2644
bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
2646
return !strcmp(log_file_name, log_file_name_arg);
2651
Wrappers around new_file_impl to avoid using argument
2652
to control locking. The argument 1) less readable 2) breaks
2653
incapsulation 3) allows external access to the class without
2654
a lock (which is not possible with private new_file_without_locking
2658
void MYSQL_BIN_LOG::new_file()
2664
void MYSQL_BIN_LOG::new_file_without_locking()
2671
Start writing to a new log file or reopen the old file.
2673
@param need_lock Set to 1 if caller has not locked LOCK_log
2676
The new file name is stored last in the index file
2679
void MYSQL_BIN_LOG::new_file_impl(bool need_lock)
2681
char new_name[FN_REFLEN], *new_name_ptr, *old_name;
2689
pthread_mutex_lock(&LOCK_log);
2690
pthread_mutex_lock(&LOCK_index);
2692
safe_mutex_assert_owner(&LOCK_log);
2693
safe_mutex_assert_owner(&LOCK_index);
2696
if binlog is used as tc log, be sure all xids are "unlogged",
2697
so that on recover we only need to scan one - latest - binlog file
2698
for prepared xids. As this is expected to be a rare event,
2699
simple wait strategy is enough. We're locking LOCK_log to be sure no
2700
new Xid_log_event's are added to the log (and prepared_xids is not
2701
increased), and waiting on COND_prep_xids for late threads to
2706
tc_log_page_waits++;
2707
pthread_mutex_lock(&LOCK_prep_xids);
2708
while (prepared_xids) {
2709
pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
2711
pthread_mutex_unlock(&LOCK_prep_xids);
2714
/* Reuse old name if not binlog and not update log */
2718
If user hasn't specified an extension, generate a new log name
2719
We have to do this here and not in open as we want to store the
2720
new file name in the current binary log file.
2722
if (generate_new_name(new_name, name))
2724
new_name_ptr=new_name;
2726
if (log_type == LOG_BIN)
2728
if (!no_auto_events)
2731
We log the whole file name for log file as the user may decide
2732
to change base names at some point.
2734
Rotate_log_event r(new_name+dirname_length(new_name),
2735
0, LOG_EVENT_OFFSET, 0);
2737
bytes_written += r.data_written;
2740
Update needs to be signalled even if there is no rotate event
2741
log rotation should give the waiting thread a signal to
2742
discover EOF and move on to the next log.
2747
name=0; // Don't free name
2748
close(LOG_CLOSE_TO_BE_OPENED);
2751
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2755
new_file() is only used for rotation (in FLUSH LOGS or because size >
2756
max_binlog_size or max_relay_log_size).
2757
If this is a binary log, the Format_description_log_event at the beginning of
2758
the new file should have created=0 (to distinguish with the
2759
Format_description_log_event written at server startup, which should
2760
trigger temp tables deletion on slaves.
2763
open(old_name, log_type, new_name_ptr,
2764
io_cache_type, no_auto_events, max_size, 1);
2765
my_free(old_name,MYF(0));
2769
pthread_mutex_unlock(&LOCK_log);
2770
pthread_mutex_unlock(&LOCK_index);
2776
bool MYSQL_BIN_LOG::append(Log_event* ev)
2779
pthread_mutex_lock(&LOCK_log);
2781
assert(log_file.type == SEQ_READ_APPEND);
2783
Log_event::write() is smart enough to use my_b_write() or
2784
my_b_append() depending on the kind of cache we have.
2786
if (ev->write(&log_file))
2791
bytes_written+= ev->data_written;
2792
if ((uint) my_b_append_tell(&log_file) > max_size)
2793
new_file_without_locking();
2796
pthread_mutex_unlock(&LOCK_log);
2797
signal_update(); // Safe as we don't call close
2802
bool MYSQL_BIN_LOG::appendv(const char* buf, uint len,...)
2808
assert(log_file.type == SEQ_READ_APPEND);
2810
safe_mutex_assert_owner(&LOCK_log);
2813
if (my_b_append(&log_file,(uchar*) buf,len))
2818
bytes_written += len;
2819
} while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
2820
if ((uint) my_b_append_tell(&log_file) > max_size)
2821
new_file_without_locking();
2830
bool MYSQL_BIN_LOG::flush_and_sync()
2832
int err=0, fd=log_file.file;
2833
safe_mutex_assert_owner(&LOCK_log);
2834
if (flush_io_cache(&log_file))
2836
if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
2838
sync_binlog_counter= 0;
2839
err=my_sync(fd, MYF(MY_WME));
2844
void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
2846
assert(!thd->binlog_evt_union.do_union);
2847
thd->binlog_evt_union.do_union= true;
2848
thd->binlog_evt_union.unioned_events= false;
2849
thd->binlog_evt_union.unioned_events_trans= false;
2850
thd->binlog_evt_union.first_query_id= query_id_param;
2853
void MYSQL_BIN_LOG::stop_union_events(THD *thd)
2855
assert(thd->binlog_evt_union.do_union);
2856
thd->binlog_evt_union.do_union= false;
2859
bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
2861
return (thd->binlog_evt_union.do_union &&
2862
query_id_param >= thd->binlog_evt_union.first_query_id);
2867
These functions are placed in this file since they need access to
2868
binlog_hton, which has internal linkage.
2871
int THD::binlog_setup_trx_data()
2873
binlog_trx_data *trx_data=
2874
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2877
return(0); // Already set up
2879
trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL));
2881
open_cached_file(&trx_data->trans_log, mysql_tmpdir,
2882
LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
2884
my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR));
2885
return(1); // Didn't manage to set it up
2887
thd_set_ha_data(this, binlog_hton, trx_data);
2889
trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data;
2895
Function to start a statement and optionally a transaction for the
2899
binlog_start_trans_and_stmt()
2903
This function does three things:
2904
- Start a transaction if not in autocommit mode or if a BEGIN
2905
statement has been seen.
2907
- Start a statement transaction to allow us to truncate the binary
2910
- Save the currrent binlog position so that we can roll back the
2911
statement by truncating the transaction log.
2913
We only update the saved position if the old one was undefined,
2914
the reason is that there are some cases (e.g., for CREATE-SELECT)
2915
where the position is saved twice (e.g., both in
2916
select_create::prepare() and THD::binlog_write_table_map()) , but
2917
we should use the first. This means that calls to this function
2918
can be used to start the statement before the first table map
2919
event, to include some extra events.
2923
THD::binlog_start_trans_and_stmt()
2925
binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2927
if (trx_data == NULL ||
2928
trx_data->before_stmt_pos == MY_OFF_T_UNDEF)
2930
this->binlog_set_stmt_begin();
2931
if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
2932
trans_register_ha(this, true, binlog_hton);
2933
trans_register_ha(this, false, binlog_hton);
2935
Mark statement transaction as read/write. We never start
2936
a binary log transaction and keep it read-only,
2937
therefore it's best to mark the transaction read/write just
2938
at the same time we start it.
2939
Not necessary to mark the normal transaction read/write
2940
since the statement-level flag will be propagated automatically
2941
inside ha_commit_trans.
2943
ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
2948
void THD::binlog_set_stmt_begin() {
2949
binlog_trx_data *trx_data=
2950
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2953
The call to binlog_trans_log_savepos() might create the trx_data
2954
structure, if it didn't exist before, so we save the position
2955
into an auto variable and then write it into the transaction
2956
data for the binary log (i.e., trx_data).
2959
binlog_trans_log_savepos(this, &pos);
2960
trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2961
trx_data->before_stmt_pos= pos;
2966
Write a table map to the binary log.
2969
int THD::binlog_write_table_map(TABLE *table, bool is_trans)
2973
/* Pre-conditions */
2974
assert(current_stmt_binlog_row_based && mysql_bin_log.is_open());
2975
assert(table->s->table_map_id != ULONG_MAX);
2977
Table_map_log_event::flag_set const
2978
flags= Table_map_log_event::TM_NO_FLAGS;
2981
the_event(this, table, table->s->table_map_id, is_trans, flags);
2983
if (is_trans && binlog_table_maps == 0)
2984
binlog_start_trans_and_stmt();
2986
if ((error= mysql_bin_log.write(&the_event)))
2989
binlog_table_maps++;
2990
table->s->table_map_version= mysql_bin_log.table_map_version();
2995
THD::binlog_get_pending_rows_event() const
2997
binlog_trx_data *const trx_data=
2998
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3000
This is less than ideal, but here's the story: If there is no
3001
trx_data, prepare_pending_rows_event() has never been called
3002
(since the trx_data is set up there). In that case, we just return
3005
return trx_data ? trx_data->pending() : NULL;
3009
THD::binlog_set_pending_rows_event(Rows_log_event* ev)
3011
if (thd_get_ha_data(this, binlog_hton) == NULL)
3012
binlog_setup_trx_data();
3014
binlog_trx_data *const trx_data=
3015
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3018
trx_data->set_pending(ev);
3023
Moves the last bunch of rows from the pending Rows event to the binlog
3024
(either cached binlog if transaction, or disk binlog). Sets a new pending
3028
MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
3029
Rows_log_event* event)
3031
assert(mysql_bin_log.is_open());
3035
binlog_trx_data *const trx_data=
3036
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3040
if (Rows_log_event* pending= trx_data->pending())
3042
IO_CACHE *file= &log_file;
3045
Decide if we should write to the log file directly or to the
3048
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
3049
file= &trx_data->trans_log;
3052
If we are writing to the log file directly, we could avoid
3053
locking the log. This does not work since we need to step the
3054
m_table_map_version below, and that change has to be protected
3055
by the LOCK_log mutex.
3057
pthread_mutex_lock(&LOCK_log);
3060
Write pending event to log file or transaction cache
3062
if (pending->write(file))
3064
pthread_mutex_unlock(&LOCK_log);
3069
We step the table map version if we are writing an event
3070
representing the end of a statement. We do this regardless of
3071
wheather we write to the transaction cache or to directly to the
3074
In an ideal world, we could avoid stepping the table map version
3075
if we were writing to a transaction cache, since we could then
3076
reuse the table map that was written earlier in the transaction
3077
cache. This does not work since STMT_END_F implies closing all
3078
table mappings on the slave side.
3080
TODO: Find a solution so that table maps does not have to be
3081
written several times within a transaction.
3083
if (pending->get_flags(Rows_log_event::STMT_END_F))
3084
++m_table_map_version;
3088
if (file == &log_file)
3090
error= flush_and_sync();
3094
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3098
pthread_mutex_unlock(&LOCK_log);
3101
thd->binlog_set_pending_rows_event(event);
3107
Write an event to the binary log.
3110
bool MYSQL_BIN_LOG::write(Log_event *event_info)
3112
THD *thd= event_info->thd;
3115
if (thd->binlog_evt_union.do_union)
3118
In Stored function; Remember that function call caused an update.
3119
We will log the function call to the binary log on function exit
3121
thd->binlog_evt_union.unioned_events= true;
3122
thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
3127
Flush the pending rows event to the transaction cache or to the
3128
log file. Since this function potentially aquire the LOCK_log
3129
mutex, we do this before aquiring the LOCK_log mutex in this
3132
We only end the statement if we are in a top-level statement. If
3133
we are inside a stored function, we do not end the statement since
3134
this will close all tables on the slave.
3136
bool const end_stmt= false;
3137
thd->binlog_flush_pending_rows_event(end_stmt);
3139
pthread_mutex_lock(&LOCK_log);
3142
In most cases this is only called if 'is_open()' is true; in fact this is
3143
mostly called if is_open() *was* true a few instructions before, but it
3144
could have changed since.
3146
if (likely(is_open()))
3148
IO_CACHE *file= &log_file;
3150
In the future we need to add to the following if tests like
3151
"do the involved tables match (to be implemented)
3152
binlog_[wild_]{do|ignore}_table?" (WL#1049)"
3154
const char *local_db= event_info->get_db();
3155
if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
3156
(!binlog_filter->db_ok(local_db)))
3158
VOID(pthread_mutex_unlock(&LOCK_log));
3163
Should we write to the binlog cache or to the binlog on disk?
3164
Write to the binlog cache if:
3165
- it is already not empty (meaning we're in a transaction; note that the
3166
present event could be about a non-transactional table, but still we need
3167
to write to the binlog cache in that case to handle updates to mixed
3168
trans/non-trans table types the best possible in binlogging)
3169
- or if the event asks for it (cache_stmt == TRUE).
3171
if (opt_using_transactions && thd)
3173
if (thd->binlog_setup_trx_data())
3176
binlog_trx_data *const trx_data=
3177
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3178
IO_CACHE *trans_log= &trx_data->trans_log;
3179
my_off_t trans_log_pos= my_b_tell(trans_log);
3180
if (event_info->get_cache_stmt() || trans_log_pos != 0)
3182
if (trans_log_pos == 0)
3183
thd->binlog_start_trans_and_stmt();
3187
TODO as Mats suggested, for all the cases above where we write to
3188
trans_log, it sounds unnecessary to lock LOCK_log. We should rather
3189
test first if we want to write to trans_log, and if not, lock
3195
No check for auto events flag here - this write method should
3196
never be called if auto-events are enabled
3200
1. Write first log events which describe the 'run environment'
3205
If row-based binlogging, Insert_id, Rand and other kind of "setting
3206
context" events are not needed.
3210
if (!thd->current_stmt_binlog_row_based)
3212
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3214
Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
3215
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
3219
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3222
If the auto_increment was second in a table's index (possible with
3223
MyISAM or BDB) (table->next_number_keypart != 0), such event is
3224
in fact not necessary. We could avoid logging it.
3226
Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
3227
thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3234
Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
3238
if (thd->user_var_events.elements)
3240
for (uint i= 0; i < thd->user_var_events.elements; i++)
3242
BINLOG_USER_VAR_EVENT *user_var_event;
3243
get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
3244
User_var_log_event e(thd, user_var_event->user_var_event->name.str,
3245
user_var_event->user_var_event->name.length,
3246
user_var_event->value,
3247
user_var_event->length,
3248
user_var_event->type,
3249
user_var_event->charset_number);
3258
Write the SQL command
3261
if (event_info->write(file))
3264
if (file == &log_file) // we are writing to the real log (disk)
3266
if (flush_and_sync())
3269
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3276
if (my_errno == EFBIG)
3277
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
3279
my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
3284
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
3285
++m_table_map_version;
3287
pthread_mutex_unlock(&LOCK_log);
3292
int error_log_print(enum loglevel level, const char *format,
3295
return logger.error_log_print(level, format, args);
3299
bool slow_log_print(THD *thd, const char *query, uint query_length,
3300
uint64_t current_utime)
3302
return logger.slow_log_print(thd, query, query_length, current_utime);
3306
bool LOGGER::log_command(THD *thd, enum enum_server_command command)
3309
Log command if we have at least one log event handler enabled and want
3310
to log this king of commands
3312
if (*general_log_handler_list && (what_to_log & (1L << (uint) command)))
3314
if (thd->options & OPTION_LOG_OFF)
3327
bool general_log_print(THD *thd, enum enum_server_command command,
3328
const char *format, ...)
3333
/* Print the message to the buffer if we want to log this king of commands */
3334
if (! logger.log_command(thd, command))
3337
va_start(args, format);
3338
error= logger.general_log_print(thd, command, format, args);
3344
bool general_log_write(THD *thd, enum enum_server_command command,
3345
const char *query, uint query_length)
3347
/* Write the message to the log if we want to log this king of commands */
3348
if (logger.log_command(thd, command))
3349
return logger.general_log_write(thd, command, query, query_length);
3354
void MYSQL_BIN_LOG::rotate_and_purge(uint flags)
3356
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3357
pthread_mutex_lock(&LOCK_log);
3358
if ((flags & RP_FORCE_ROTATE) ||
3359
(my_b_tell(&log_file) >= (my_off_t) max_size))
3361
new_file_without_locking();
3362
#ifdef HAVE_REPLICATION
3363
if (expire_logs_days)
3365
time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
3366
if (purge_time >= 0)
3367
purge_logs_before_date(purge_time);
3371
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3372
pthread_mutex_unlock(&LOCK_log);
3375
uint MYSQL_BIN_LOG::next_file_id()
3378
pthread_mutex_lock(&LOCK_log);
3380
pthread_mutex_unlock(&LOCK_log);
3386
Write the contents of a cache to the binary log.
3390
cache Cache to write to the binary log
3391
lock_log True if the LOCK_log mutex should be aquired, false otherwise
3392
sync_log True if the log should be flushed and sync:ed
3395
Write the contents of the cache to the binary log. The cache will
3396
be reset as a READ_CACHE to be able to read the contents from it.
3399
int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
3401
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
3403
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
3404
return ER_ERROR_ON_WRITE;
3405
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
3407
uchar header[LOG_EVENT_HEADER_LEN];
3410
The events in the buffer have incorrect end_log_pos data
3411
(relative to beginning of group rather than absolute),
3412
so we'll recalculate them in situ so the binlog is always
3413
correct, even in the middle of a group. This is possible
3414
because we now know the start position of the group (the
3415
offset of this cache in the log, if you will); all we need
3416
to do is to find all event-headers, and add the position of
3417
the group to the end_log_pos of each event. This is pretty
3418
straight forward, except that we read the cache in segments,
3419
so an event-header might end up on the cache-border and get
3423
group= (uint)my_b_tell(&log_file);
3430
if we only got a partial header in the last iteration,
3431
get the other half now and process a full header.
3433
if (unlikely(carry > 0))
3435
assert(carry < LOG_EVENT_HEADER_LEN);
3437
/* assemble both halves */
3438
memcpy(&header[carry], (char *)cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
3440
/* fix end_log_pos */
3441
val= uint4korr(&header[LOG_POS_OFFSET]) + group;
3442
int4store(&header[LOG_POS_OFFSET], val);
3444
/* write the first half of the split header */
3445
if (my_b_write(&log_file, header, carry))
3446
return ER_ERROR_ON_WRITE;
3449
copy fixed second half of header to cache so the correct
3450
version will be written later.
3452
memcpy((char *)cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
3454
/* next event header at ... */
3455
hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
3460
/* if there is anything to write, process it. */
3462
if (likely(length > 0))
3465
process all event-headers in this (partial) cache.
3466
if next header is beyond current read-buffer,
3467
we'll get it later (though not necessarily in the
3468
very next iteration, just "eventually").
3471
while (hdr_offs < length)
3474
partial header only? save what we can get, process once
3478
if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
3480
carry= length - hdr_offs;
3481
memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
3486
/* we've got a full event-header, and it came in one piece */
3488
uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
3490
/* fix end_log_pos */
3491
val= uint4korr(log_pos) + group;
3492
int4store(log_pos, val);
3494
/* next event header at ... */
3495
log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
3496
hdr_offs += uint4korr(log_pos);
3502
Adjust hdr_offs. Note that it may still point beyond the segment
3503
read in the next iteration; if the current event is very long,
3504
it may take a couple of read-iterations (and subsequent adjustments
3505
of hdr_offs) for it to point into the then-current segment.
3506
If we have a split header (!carry), hdr_offs will be set at the
3507
beginning of the next iteration, overwriting the value we set here:
3512
/* Write data to the binary log file */
3513
if (my_b_write(&log_file, cache->read_pos, length))
3514
return ER_ERROR_ON_WRITE;
3515
cache->read_pos=cache->read_end; // Mark buffer used up
3516
} while ((length= my_b_fill(cache)));
3527
Write a cached log entry to the binary log.
3528
- To support transaction over replication, we wrap the transaction
3529
with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
3530
We want to write a BEGIN/ROLLBACK block when a non-transactional table
3531
was updated in a transaction which was rolled back. This is to ensure
3532
that the same updates are run on the slave.
3535
@param cache The cache to copy to the binlog
3536
@param commit_event The commit event to print after writing the
3537
contents of the cache.
3540
We only come here if there is something in the cache.
3542
The thing in the cache is always a complete transaction.
3544
'cache' needs to be reinitialized after this functions returns.
3547
bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
3549
VOID(pthread_mutex_lock(&LOCK_log));
3551
/* NULL would represent nothing to replicate after ROLLBACK */
3552
assert(commit_event != NULL);
3555
if (likely(is_open())) // Should always be true
3558
We only bother to write to the binary log if there is anything
3561
if (my_b_tell(cache) > 0)
3564
Log "BEGIN" at the beginning of every transaction. Here, a
3565
transaction is either a BEGIN..COMMIT block or a single
3566
statement in autocommit mode.
3568
Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), true, false);
3570
Imagine this is rollback due to net timeout, after all
3571
statements of the transaction succeeded. Then we want a
3572
zero-error code in BEGIN. In other words, if there was a
3573
really serious error code it's already in the statement's
3574
events, there is no need to put it also in this internally
3575
generated event, and as this event is generated late it would
3576
lead to false alarms.
3578
This is safer than thd->clear_error() against kills at shutdown.
3580
qinfo.error_code= 0;
3582
Now this Query_log_event has artificial log_pos 0. It must be
3583
adjusted to reflect the real position in the log. Not doing it
3584
would confuse the slave: it would prevent this one from
3585
knowing where he is in the master's binlog, which would result
3586
in wrong positions being shown to the user, MASTER_POS_WAIT
3589
if (qinfo.write(&log_file))
3592
if ((write_error= write_cache(cache, false, false)))
3595
if (commit_event && commit_event->write(&log_file))
3597
if (flush_and_sync())
3599
if (cache->error) // Error on read
3601
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
3602
write_error=1; // Don't give more errors
3609
if commit_event is Xid_log_event, increase the number of
3610
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
3611
if there're prepared xids in it - see the comment in new_file() for
3613
If the commit_event is not Xid_log_event (then it's a Query_log_event)
3614
rotate binlog, if necessary.
3616
if (commit_event && commit_event->get_type_code() == XID_EVENT)
3618
pthread_mutex_lock(&LOCK_prep_xids);
3620
pthread_mutex_unlock(&LOCK_prep_xids);
3623
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3625
VOID(pthread_mutex_unlock(&LOCK_log));
3633
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
3635
VOID(pthread_mutex_unlock(&LOCK_log));
3641
Wait until we get a signal that the relay log has been updated
3643
@param[in] thd a THD struct
3645
LOCK_log must be taken before calling this function.
3646
It will be released at the end of the function.
3649
void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
3651
const char *old_msg;
3652
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3653
"Slave has read all relay log; "
3654
"waiting for the slave I/O "
3655
"thread to update it" );
3656
pthread_cond_wait(&update_cond, &LOCK_log);
3657
thd->exit_cond(old_msg);
3663
Wait until we get a signal that the binary log has been updated.
3664
Applies to master only.
3667
@param[in] thd a THD struct
3668
@param[in] timeout a pointer to a timespec;
3669
NULL means to wait w/o timeout.
3670
@retval 0 if got signalled on update
3671
@retval non-0 if wait timeout elapsed
3673
LOCK_log must be taken before calling this function.
3674
LOCK_log is being released while the thread is waiting.
3675
LOCK_log is released by the caller.
3678
int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
3679
const struct timespec *timeout)
3682
const char* old_msg = thd->proc_info;
3683
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3684
"Master has sent all binlog to slave; "
3685
"waiting for binlog to be updated");
3687
pthread_cond_wait(&update_cond, &LOCK_log);
3689
ret= pthread_cond_timedwait(&update_cond, &LOCK_log,
3690
const_cast<struct timespec *>(timeout));
3698
@param exiting Bitmask for one or more of the following bits:
3699
- LOG_CLOSE_INDEX : if we should close the index file
3700
- LOG_CLOSE_TO_BE_OPENED : if we intend to call open
3701
at once after close.
3702
- LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
3705
One can do an open on the object at once after doing a close.
3706
The internal structures are not freed until cleanup() is called
3709
void MYSQL_BIN_LOG::close(uint exiting)
3710
{ // One can't set log_type here!
3711
if (log_state == LOG_OPENED)
3713
#ifdef HAVE_REPLICATION
3714
if (log_type == LOG_BIN && !no_auto_events &&
3715
(exiting & LOG_CLOSE_STOP_EVENT))
3719
bytes_written+= s.data_written;
3722
#endif /* HAVE_REPLICATION */
3724
/* don't pwrite in a file opened with O_APPEND - it doesn't work */
3725
if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
3727
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
3728
uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
3729
pwrite(log_file.file, &flags, 1, offset);
3732
/* this will cleanup IO_CACHE, sync and close the file */
3733
MYSQL_LOG::close(exiting);
3737
The following test is needed even if is_open() is not set, as we may have
3738
called a not complete close earlier and the index file is still open.
3741
if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
3743
end_io_cache(&index_file);
3744
if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
3747
sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
3750
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
3756
void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
3759
We need to take locks, otherwise this may happen:
3760
new_file() is called, calls open(old_max_size), then before open() starts,
3761
set_max_size() sets max_size to max_size_arg, then open() starts and
3762
uses the old_max_size argument, so max_size_arg has been overwritten and
3763
it's like if the SET command was never run.
3765
pthread_mutex_lock(&LOCK_log);
3767
max_size= max_size_arg;
3768
pthread_mutex_unlock(&LOCK_log);
3774
Check if a string is a valid number.
3776
@param str String to test
3777
@param res Store value here
3778
@param allow_wildcards Set to 1 if we should ignore '%' and '_'
3781
For the moment the allow_wildcards argument is not used
3782
Should be move to some other file.
3785
1 String is a number
3790
static bool test_if_number(register const char *str,
3791
long *res, bool allow_wildcards)
3798
while (*str++ == ' ') ;
3799
if (*--str == '-' || *str == '+')
3801
while (my_isdigit(files_charset_info,*str) ||
3802
(allow_wildcards && (*str == wild_many || *str == wild_one)))
3810
my_isdigit(files_charset_info,*str) ||
3811
(allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3814
if (*str != 0 || flag == 0)
3818
return(1); /* Number ok */
3819
} /* test_if_number */
3822
void sql_perror(const char *message)
3824
#ifdef HAVE_STRERROR
3825
sql_print_error("%s: %s",message, strerror(errno));
3832
bool flush_error_log()
3837
char err_renamed[FN_REFLEN], *end;
3838
end= strmake(err_renamed,log_error_file,FN_REFLEN-4);
3839
strmov(end, "-old");
3840
VOID(pthread_mutex_lock(&LOCK_error_log));
3841
char err_temp[FN_REFLEN+4];
3843
On Windows is necessary a temporary file for to rename
3844
the current error file.
3846
strxmov(err_temp, err_renamed,"-tmp",NullS);
3847
(void) my_delete(err_temp, MYF(0));
3848
if (freopen(err_temp,"a+",stdout))
3854
freopen(err_temp,"a+",stderr);
3855
(void) my_delete(err_renamed, MYF(0));
3856
my_rename(log_error_file,err_renamed,MYF(0));
3857
if (freopen(log_error_file,"a+",stdout))
3858
freopen(log_error_file,"a+",stderr);
3860
if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0)
3862
while ((bytes= my_read(fd, buf, IO_SIZE, MYF(0))) &&
3863
bytes != MY_FILE_ERROR)
3864
my_fwrite(stderr, buf, bytes, MYF(0));
3865
my_close(fd, MYF(0));
3867
(void) my_delete(err_temp, MYF(0));
3871
VOID(pthread_mutex_unlock(&LOCK_error_log));
3876
void MYSQL_BIN_LOG::signal_update()
3878
pthread_cond_broadcast(&update_cond);
3883
Prints a printf style message to the error log and, under NT, to the
3886
This function prints the message into a buffer and then sends that buffer
3887
to other functions to write that message to other logging sources.
3889
@param event_type Type of event to write (Error, Warning, or Info)
3890
@param format Printf style format of message
3891
@param args va_list list of arguments for the message
3894
The function always returns 0. The return value is present in the
3895
signature to be compatible with other logging routines, which could
3896
return an error (e.g. logging to the log tables)
3898
static void print_buffer_to_file(enum loglevel level,
3899
int error_code __attribute__((__unused__)),
3901
size_t buffer_length __attribute__((__unused__)))
3907
VOID(pthread_mutex_lock(&LOCK_error_log));
3910
localtime_r(&skr, &tm_tmp);
3913
fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n",
3914
start->tm_year % 100,
3920
(level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
3921
"Warning" : "Note"),
3926
VOID(pthread_mutex_unlock(&LOCK_error_log));
3931
int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
3935
int error_code= errno;
3937
length= vsnprintf(buff, sizeof(buff), format, args);
3939
print_buffer_to_file(level, error_code, buff, length);
3945
void sql_print_error(const char *format, ...)
3949
va_start(args, format);
3950
error_log_print(ERROR_LEVEL, format, args);
3957
void sql_print_warning(const char *format, ...)
3961
va_start(args, format);
3962
error_log_print(WARNING_LEVEL, format, args);
3969
void sql_print_information(const char *format, ...)
3973
va_start(args, format);
3974
error_log_print(INFORMATION_LEVEL, format, args);
3981
/********* transaction coordinator log for 2pc - mmap() based solution *******/
3984
the log consists of a file, mmapped to a memory.
3985
file is divided on pages of tc_log_page_size size.
3986
(usable size of the first page is smaller because of log header)
3987
there's PAGE control structure for each page
3988
each page (or rather PAGE control structure) can be in one of three
3989
states - active, syncing, pool.
3990
there could be only one page in active or syncing states,
3991
but many in pool - pool is fifo queue.
3992
usual lifecycle of a page is pool->active->syncing->pool
3993
"active" page - is a page where new xid's are logged.
3994
the page stays active as long as syncing slot is taken.
3995
"syncing" page is being synced to disk. no new xid can be added to it.
3996
when the sync is done the page is moved to a pool and an active page
3999
the result of such an architecture is a natural "commit grouping" -
4000
If commits are coming faster than the system can sync, they do not
4001
stall. Instead, all commit that came since the last sync are
4002
logged to the same page, and they all are synced with the next -
4003
one - sync. Thus, thought individual commits are delayed, throughput
4006
when a xid is added to an active page, the thread of this xid waits
4007
for a page's condition until the page is synced. when syncing slot
4008
becomes vacant one of these waiters is awaken to take care of syncing.
4009
it syncs the page and signals all waiters that the page is synced.
4010
PAGE::waiters is used to count these waiters, and a page may never
4011
become active again until waiters==0 (that is all waiters from the
4012
previous sync have noticed the sync was completed)
4014
note, that the page becomes "dirty" and has to be synced only when a
4015
new xid is added into it. Removing a xid from a page does not make it
4016
dirty - we don't sync removals to disk.
4019
ulong tc_log_page_waits= 0;
4023
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
4025
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
4027
ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
4028
ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
4030
int TC_LOG_MMAP::open(const char *opt_name)
4033
bool crashed= false;
4036
assert(total_ha_2pc > 1);
4037
assert(opt_name && opt_name[0]);
4039
tc_log_page_size= my_getpagesize();
4040
assert(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
4042
fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
4043
if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
4045
if (my_errno != ENOENT)
4047
if (using_heuristic_recover())
4049
if ((fd= my_create(logname, CREATE_MODE, O_RDWR, MYF(MY_WME))) < 0)
4052
file_length= opt_tc_log_size;
4053
if (ftruncate(fd, file_length))
4060
sql_print_information("Recovering after a crash using %s", opt_name);
4061
if (tc_heuristic_recover)
4063
sql_print_error("Cannot perform automatic crash recovery when "
4064
"--tc-heuristic-recover is used");
4067
file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
4068
if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
4072
data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
4073
MAP_NOSYNC|MAP_SHARED, fd, 0);
4074
if (data == MAP_FAILED)
4081
npages=(uint)file_length/tc_log_page_size;
4082
assert(npages >= 3); // to guarantee non-empty pool
4083
if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
4086
for (pg=pages, i=0; i < npages; i++, pg++)
4091
pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
4092
pthread_cond_init (&pg->cond, 0);
4093
pg->start=(my_xid *)(data + i*tc_log_page_size);
4095
pg->end=(my_xid *)(pg->start + tc_log_page_size);
4096
pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
4098
pages[0].size=pages[0].free=
4099
(tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
4100
pages[0].start=pages[0].end-pages[0].size;
4101
pages[npages-1].next=0;
4104
if (crashed && recover())
4107
memcpy(data, tc_log_magic, sizeof(tc_log_magic));
4108
data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
4109
msync(data, tc_log_page_size, MS_SYNC);
4110
my_sync(fd, MYF(0));
4113
pthread_mutex_init(&LOCK_sync, MY_MUTEX_INIT_FAST);
4114
pthread_mutex_init(&LOCK_active, MY_MUTEX_INIT_FAST);
4115
pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST);
4116
pthread_cond_init(&COND_active, 0);
4117
pthread_cond_init(&COND_pool, 0);
4124
pool_last=pages+npages-1;
4134
there is no active page, let's got one from the pool.
4136
Two strategies here:
4137
-# take the first from the pool
4138
-# if there're waiters - take the one with the most free space.
4141
TODO page merging. try to allocate adjacent page first,
4142
so that they can be flushed both in one sync
4145
void TC_LOG_MMAP::get_active_from_pool()
4147
PAGE **p, **best_p=0;
4151
pthread_mutex_lock(&LOCK_pool);
4156
if ((*p)->waiters == 0) // can the first page be used ?
4157
break; // yes - take it.
4159
best_free=0; // no - trying second strategy
4160
for (p=&(*p)->next; *p; p=&(*p)->next)
4162
if ((*p)->waiters == 0 && (*p)->free > best_free)
4164
best_free=(*p)->free;
4169
while ((*best_p == 0 || best_free == 0) && overflow());
4172
if (active->free == active->size) // we've chosen an empty page
4174
tc_log_cur_pages_used++;
4175
set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
4178
if ((*best_p)->next) // unlink the page from the pool
4179
*best_p=(*best_p)->next;
4184
pthread_mutex_unlock(&LOCK_pool);
4189
perhaps, increase log size ?
4191
int TC_LOG_MMAP::overflow()
4194
simple overflow handling - just wait
4195
TODO perhaps, increase log size ?
4196
let's check the behaviour of tc_log_page_waits first
4198
tc_log_page_waits++;
4199
pthread_cond_wait(&COND_pool, &LOCK_pool);
4200
return 1; // always return 1
4204
Record that transaction XID is committed on the persistent storage.
4206
This function is called in the middle of two-phase commit:
4207
First all resources prepare the transaction, then tc_log->log() is called,
4208
then all resources commit the transaction, then tc_log->unlog() is called.
4210
All access to active page is serialized but it's not a problem, as
4211
we're assuming that fsync() will be a main bottleneck.
4212
That is, parallelizing writes to log pages we'll decrease number of
4213
threads waiting for a page, but then all these threads will be waiting
4214
for a fsync() anyway
4216
If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
4217
records XID in a special Xid_log_event.
4218
If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
4224
\# - otherwise, "cookie", a number that will be passed as an argument
4225
to unlog() call. tc_log can define it any way it wants,
4226
and use for whatever purposes. TC_LOG_MMAP sets it
4227
to the position in memory where xid was logged to.
4230
int TC_LOG_MMAP::log_xid(THD *thd __attribute__((__unused__)), my_xid xid)
4236
pthread_mutex_lock(&LOCK_active);
4239
if active page is full - just wait...
4240
frankly speaking, active->free here accessed outside of mutex
4241
protection, but it's safe, because it only means we may miss an
4242
unlog() for the active page, and we're not waiting for it here -
4243
unlog() does not signal COND_active.
4245
while (unlikely(active && active->free == 0))
4246
pthread_cond_wait(&COND_active, &LOCK_active);
4248
/* no active page ? take one from the pool */
4250
get_active_from_pool();
4253
pthread_mutex_lock(&p->lock);
4255
/* searching for an empty slot */
4259
assert(p->ptr < p->end); // because p->free > 0
4262
/* found! store xid there and mark the page dirty */
4263
cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
4268
/* to sync or not to sync - this is the question */
4269
pthread_mutex_unlock(&LOCK_active);
4270
pthread_mutex_lock(&LOCK_sync);
4271
pthread_mutex_unlock(&p->lock);
4274
{ // somebody's syncing. let's wait
4277
note - it must be while (), not do ... while () here
4278
as p->state may be not DIRTY when we come here
4280
while (p->state == DIRTY && syncing)
4281
pthread_cond_wait(&p->cond, &LOCK_sync);
4283
err= p->state == ERROR;
4284
if (p->state != DIRTY) // page was synced
4286
if (p->waiters == 0)
4287
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4288
pthread_mutex_unlock(&LOCK_sync);
4289
goto done; // we're done
4291
} // page was not synced! do it now
4292
assert(active == p && syncing == 0);
4293
pthread_mutex_lock(&LOCK_active);
4294
syncing=p; // place is vacant - take it
4295
active=0; // page is not active anymore
4296
pthread_cond_broadcast(&COND_active); // in case somebody's waiting
4297
pthread_mutex_unlock(&LOCK_active);
4298
pthread_mutex_unlock(&LOCK_sync);
4302
return err ? 0 : cookie;
4305
int TC_LOG_MMAP::sync()
4309
assert(syncing != active);
4312
sit down and relax - this can take a while...
4313
note - no locks are held at this point
4315
err= msync(syncing->start, 1, MS_SYNC);
4317
err= my_sync(fd, MYF(0));
4319
/* page is synced. let's move it to the pool */
4320
pthread_mutex_lock(&LOCK_pool);
4321
pool_last->next=syncing;
4324
syncing->state= err ? ERROR : POOL;
4325
pthread_cond_broadcast(&syncing->cond); // signal "sync done"
4326
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4327
pthread_mutex_unlock(&LOCK_pool);
4329
/* marking 'syncing' slot free */
4330
pthread_mutex_lock(&LOCK_sync);
4332
pthread_cond_signal(&active->cond); // wake up a new syncer
4333
pthread_mutex_unlock(&LOCK_sync);
4338
erase xid from the page, update page free space counters/pointers.
4339
cookie points directly to the memory where xid was logged.
4342
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid __attribute__((__unused__)))
4344
PAGE *p=pages+(cookie/tc_log_page_size);
4345
my_xid *x=(my_xid *)(data+cookie);
4348
assert(x >= p->start && x < p->end);
4351
pthread_mutex_lock(&p->lock);
4353
assert(p->free <= p->size);
4354
set_if_smaller(p->ptr, x);
4355
if (p->free == p->size) // the page is completely empty
4356
statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
4357
if (p->waiters == 0) // the page is in pool and ready to rock
4358
pthread_cond_signal(&COND_pool); // ping ... for overflow()
4359
pthread_mutex_unlock(&p->lock);
4362
void TC_LOG_MMAP::close()
4367
pthread_mutex_destroy(&LOCK_sync);
4368
pthread_mutex_destroy(&LOCK_active);
4369
pthread_mutex_destroy(&LOCK_pool);
4370
pthread_cond_destroy(&COND_pool);
4372
data[0]='A'; // garble the first (signature) byte, in case my_delete fails
4374
for (i=0; i < npages; i++)
4376
if (pages[i].ptr == 0)
4378
pthread_mutex_destroy(&pages[i].lock);
4379
pthread_cond_destroy(&pages[i].cond);
4382
my_free((uchar*)pages, MYF(0));
4384
my_munmap((char*)data, (size_t)file_length);
4386
my_close(fd, MYF(0));
4388
if (inited>=5) // cannot do in the switch because of Windows
4389
my_delete(logname, MYF(MY_WME));
4393
int TC_LOG_MMAP::recover()
4396
PAGE *p=pages, *end_p=pages+npages;
4398
if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
4400
sql_print_error("Bad magic header in tc log");
4405
the first byte after magic signature is set to current
4406
number of storage engines on startup
4408
if (data[sizeof(tc_log_magic)] != total_ha_2pc)
4410
sql_print_error("Recovery failed! You must enable "
4411
"exactly %d storage engines that support "
4412
"two-phase commit protocol",
4413
data[sizeof(tc_log_magic)]);
4417
if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
4418
sizeof(my_xid), 0, 0, MYF(0)))
4421
for ( ; p < end_p ; p++)
4423
for (my_xid *x=p->start; x < p->end; x++)
4424
if (*x && my_hash_insert(&xids, (uchar *)x))
4428
if (ha_recover(&xids))
4432
bzero(data, (size_t)file_length);
4438
sql_print_error("Crash recovery failed. Either correct the problem "
4439
"(if it's, for example, out of memory error) and restart, "
4440
"or delete tc log and start mysqld with "
4441
"--tc-heuristic-recover={commit|rollback}");
4447
TC_LOG_DUMMY tc_log_dummy;
4448
TC_LOG_MMAP tc_log_mmap;
4451
Perform heuristic recovery, if --tc-heuristic-recover was used.
4454
no matter whether heuristic recovery was successful or not
4455
mysqld must exit. So, return value is the same in both cases.
4458
0 no heuristic recovery was requested
4460
1 heuristic recovery was performed
4463
int TC_LOG::using_heuristic_recover()
4465
if (!tc_heuristic_recover)
4468
sql_print_information("Heuristic crash recovery mode");
4470
sql_print_error("Heuristic crash recovery failed");
4471
sql_print_information("Please restart mysqld without --tc-heuristic-recover");
4475
/****** transaction coordinator log for 2pc - binlog() based solution ******/
4476
#define TC_LOG_BINLOG MYSQL_BIN_LOG
4480
keep in-memory list of prepared transactions
4481
(add to list in log(), remove on unlog())
4482
and copy it to the new binlog if rotated
4483
but let's check the behaviour of tc_log_page_waits first!
4486
int TC_LOG_BINLOG::open(const char *opt_name)
4491
assert(total_ha_2pc > 1);
4492
assert(opt_name && opt_name[0]);
4494
pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
4495
pthread_cond_init (&COND_prep_xids, 0);
4497
if (!my_b_inited(&index_file))
4499
/* There was a failure to open the index file, can't open the binlog */
4504
if (using_heuristic_recover())
4506
/* generate a new binlog to mask a corrupted one */
4507
open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
4512
if ((error= find_log_pos(&log_info, NullS, 1)))
4514
if (error != LOG_INFO_EOF)
4515
sql_print_error("find_log_pos() failed (error: %d)", error);
4526
Format_description_log_event fdle(BINLOG_VERSION);
4527
char log_name[FN_REFLEN];
4529
if (! fdle.is_valid())
4534
strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
4535
} while (!(error= find_next_log(&log_info, 1)));
4537
if (error != LOG_INFO_EOF)
4539
sql_print_error("find_log_pos() failed (error: %d)", error);
4543
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
4545
sql_print_error("%s", errmsg);
4549
if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
4550
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4551
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
4553
sql_print_information("Recovering after a crash using %s", opt_name);
4554
error= recover(&log, (Format_description_log_event *)ev);
4561
my_close(file, MYF(MY_WME));
4571
/** This is called on shutdown, after ha_panic. */
4572
void TC_LOG_BINLOG::close()
4574
assert(prepared_xids==0);
4575
pthread_mutex_destroy(&LOCK_prep_xids);
4576
pthread_cond_destroy (&COND_prep_xids);
4588
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
4590
Xid_log_event xle(thd, xid);
4591
binlog_trx_data *trx_data=
4592
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
4594
We always commit the entire transaction when writing an XID. Also
4595
note that the return value is inverted.
4597
return(!binlog_end_trans(thd, trx_data, &xle, true));
4600
void TC_LOG_BINLOG::unlog(ulong cookie __attribute__((__unused__)),
4601
my_xid xid __attribute__((__unused__)))
4603
pthread_mutex_lock(&LOCK_prep_xids);
4604
assert(prepared_xids > 0);
4605
if (--prepared_xids == 0) {
4606
pthread_cond_signal(&COND_prep_xids);
4608
pthread_mutex_unlock(&LOCK_prep_xids);
4609
rotate_and_purge(0); // as ::write() did not rotate
4612
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
4618
if (! fdle->is_valid() ||
4619
hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
4620
sizeof(my_xid), 0, 0, MYF(0)))
4623
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
4625
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
4627
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
4629
if (ev->get_type_code() == XID_EVENT)
4631
Xid_log_event *xev=(Xid_log_event *)ev;
4632
uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
4636
my_hash_insert(&xids, x);
4641
if (ha_recover(&xids))
4644
free_root(&mem_root, MYF(0));
4649
free_root(&mem_root, MYF(0));
4652
sql_print_error("Crash recovery failed. Either correct the problem "
4653
"(if it's, for example, out of memory error) and restart, "
4654
"or delete (or rename) binary log and start mysqld with "
4655
"--tc-heuristic-recover={commit|rollback}");
4660
#ifdef INNODB_COMPATIBILITY_HOOKS
4662
Get the file name of the MySQL binlog.
4663
@return the name of the binlog file
4666
const char* mysql_bin_log_file_name(void)
4668
return mysql_bin_log.get_log_fname();
4671
Get the current position of the MySQL binlog.
4672
@return byte offset from the beginning of the binlog
4675
uint64_t mysql_bin_log_file_pos(void)
4677
return (uint64_t) mysql_bin_log.get_log_file()->pos_in_file;
4679
#endif /* INNODB_COMPATIBILITY_HOOKS */
4682
mysql_declare_plugin(binlog)
4684
MYSQL_STORAGE_ENGINE_PLUGIN,
4688
"This is a pseudo storage engine to represent the binlog in a transaction",
4690
binlog_init, /* Plugin Init */
4691
NULL, /* Plugin Deinit */
4692
NULL, /* status variables */
4693
NULL, /* system variables */
4694
NULL /* config options */
4696
mysql_declare_plugin_end;