1
/* Copyright (C) 2000-2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
24
Abort logging when we get an error in reading or writing log files
27
#include <drizzled/server_includes.h>
29
#include "rpl_filter.h"
32
#include <mysys/my_dir.h>
35
#include <drizzled/plugin.h>
36
#include <drizzled/drizzled_error_messages.h>
38
/* max size of the log message */
39
#define MAX_LOG_BUFFER_SIZE 1024
40
#define MAX_USER_HOST_SIZE 512
41
#define MAX_TIME_SIZE 32
42
#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
44
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
48
DRIZZLE_BIN_LOG mysql_bin_log;
49
ulong sync_binlog_counter= 0;
51
static bool test_if_number(const char *str,
52
long *res, bool allow_wildcards);
53
static int binlog_init(void *p);
54
static int binlog_close_connection(handlerton *hton, THD *thd);
55
static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
56
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
57
static int binlog_commit(handlerton *hton, THD *thd, bool all);
58
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
59
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
62
sql_print_message_func sql_print_message_handlers[3] =
64
sql_print_information,
70
char *make_default_log_name(char *buff,const char* log_ext)
72
strmake(buff, pidfile_name, FN_REFLEN-5);
73
return fn_format(buff, buff, mysql_data_home, log_ext,
74
MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
78
Helper class to hold a mutex for the duration of the
81
Eliminates the need for explicit unlocking of mutexes on, e.g.,
82
error returns. On passing a null pointer, the sentry will not do
88
Mutex_sentry(pthread_mutex_t *mutex)
92
pthread_mutex_lock(mutex);
98
pthread_mutex_unlock(m_mutex);
103
pthread_mutex_t *m_mutex;
105
// It's not allowed to copy this object in any way
106
Mutex_sentry(Mutex_sentry const&);
107
void operator=(Mutex_sentry const&);
111
Helper class to store binary log transaction data.
113
class binlog_trx_data {
116
: at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
118
trans_log.end_of_file= max_binlog_cache_size;
123
assert(pending() == NULL);
124
close_cached_file(&trans_log);
127
my_off_t position() const {
128
return my_b_tell(&trans_log);
133
return pending() == NULL && my_b_tell(&trans_log) == 0;
137
Truncate the transaction cache to a certain position. This
138
includes deleting the pending event.
140
void truncate(my_off_t pos)
144
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
145
if (pos < before_stmt_pos)
146
before_stmt_pos= MY_OFF_T_UNDEF;
149
The only valid positions that can be truncated to are at the
150
beginning of a statement. We are relying on this fact to be able
151
to set the at_least_one_stmt flag correctly. In other word, if
152
we are truncating to the beginning of the transaction cache,
153
there will be no statements in the cache, otherwhise, we will
154
have at least one statement in the transaction cache.
156
at_least_one_stmt= (pos > 0);
160
Reset the entire contents of the transaction cache, emptying it
166
before_stmt_pos= MY_OFF_T_UNDEF;
167
trans_log.end_of_file= max_binlog_cache_size;
170
Rows_log_event *pending() const
175
void set_pending(Rows_log_event *const pending)
180
IO_CACHE trans_log; // The transaction cache
183
Boolean that is true if there is at least one statement in the
186
bool at_least_one_stmt;
190
Pending binrows event. This event is the event where the rows are
193
Rows_log_event *m_pending;
197
Binlog position before the start of the current statement.
199
my_off_t before_stmt_pos;
202
handlerton *binlog_hton;
205
/* Check if a given table is opened log table */
206
int check_if_log_table(uint db_len __attribute__((unused)),
207
const char *db __attribute__((unused)),
208
uint table_name_len __attribute__((unused)),
209
const char *table_name __attribute__((unused)),
210
uint check_if_opened __attribute__((unused)))
215
/* log event handlers */
217
bool Log_to_file_event_handler::
218
log_error(enum loglevel level, const char *format,
221
return vprint_msg_to_log(level, format, args);
224
void Log_to_file_event_handler::init_pthread_objects()
226
mysql_log.init_pthread_objects();
227
mysql_slow_log.init_pthread_objects();
231
/** Wrapper around DRIZZLE_LOG::write() for slow log. */
233
bool Log_to_file_event_handler::
234
log_slow(THD *thd, time_t current_time, time_t query_start_arg,
235
const char *user_host, uint user_host_len,
236
uint64_t query_utime, uint64_t lock_utime, bool is_command,
237
const char *sql_text, uint sql_text_len)
239
return mysql_slow_log.write(thd, current_time, query_start_arg,
240
user_host, user_host_len,
241
query_utime, lock_utime, is_command,
242
sql_text, sql_text_len);
247
Wrapper around DRIZZLE_LOG::write() for general log. We need it since we
248
want all log event handlers to have the same signature.
251
bool Log_to_file_event_handler::
252
log_general(THD *thd __attribute__((unused)),
253
time_t event_time, const char *user_host,
254
uint user_host_len, int thread_id,
255
const char *command_type, uint command_type_len,
256
const char *sql_text, uint sql_text_len,
257
const CHARSET_INFO * const client_cs __attribute__((unused)))
259
return mysql_log.write(event_time, user_host, user_host_len,
260
thread_id, command_type, command_type_len,
261
sql_text, sql_text_len);
265
bool Log_to_file_event_handler::init()
270
mysql_slow_log.open_slow_log(sys_var_slow_log_path.value);
273
mysql_log.open_query_log(sys_var_general_log_path.value);
275
is_initialized= true;
282
void Log_to_file_event_handler::cleanup()
285
mysql_slow_log.cleanup();
288
void Log_to_file_event_handler::flush()
290
/* reopen log files */
292
mysql_log.reopen_file();
294
mysql_slow_log.reopen_file();
298
Log error with all enabled log event handlers
303
level The level of the error significance: NOTE,
305
format format string for the error message
306
args list of arguments for the format string
313
bool LOGGER::error_log_print(enum loglevel level, const char *format,
317
Log_event_handler **current_handler;
319
/* currently we don't need locking here as there is no error_log table */
320
for (current_handler= error_log_handler_list ; *current_handler ;)
321
error= (*current_handler++)->log_error(level, format, args) || error;
327
void LOGGER::cleanup_base()
330
rwlock_destroy(&LOCK_logger);
331
if (file_log_handler)
332
file_log_handler->cleanup();
336
void LOGGER::cleanup_end()
339
if (file_log_handler)
340
delete file_log_handler;
345
Perform basic log initialization: create file-based log handler and
348
void LOGGER::init_base()
354
Here we create file log handler. We don't do it for the table log handler
355
here as it cannot be created so early. The reason is THD initialization,
356
which depends on the system variables (parsed later).
358
if (!file_log_handler)
359
file_log_handler= new Log_to_file_event_handler;
361
/* by default we use traditional error log */
362
init_error_log(LOG_FILE);
364
file_log_handler->init_pthread_objects();
365
my_rwlock_init(&LOCK_logger, NULL);
369
bool LOGGER::flush_logs(THD *thd __attribute__((unused)))
374
Now we lock logger, as nobody should be able to use logging routines while
375
log tables are closed
377
logger.lock_exclusive();
379
/* reopen log files */
380
file_log_handler->flush();
382
/* end of log flush */
389
Log slow query with all enabled log event handlers
394
thd THD of the query being logged
395
query The query being logged
396
query_length The length of the query string
397
current_utime Current time in microseconds (from undefined start)
404
bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
405
uint64_t current_utime)
409
Log_event_handler **current_handler;
410
bool is_command= false;
411
char user_host_buff[MAX_USER_HOST_SIZE];
412
Security_context *sctx= thd->security_ctx;
413
uint user_host_len= 0;
414
uint64_t query_utime, lock_utime;
417
Print the message to the buffer if we have slow log enabled
420
if (*slow_log_handler_list)
424
/* do not log slow queries from replication threads */
425
if (thd->slave_thread && !opt_log_slow_slave_statements)
435
/* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
436
user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
437
sctx->user, "[", sctx->user, "] @ ",
439
sctx->ip, "]", NullS) -
442
current_time= my_time_possible_from_micro(current_utime);
443
if (thd->start_utime)
445
query_utime= (current_utime - thd->start_utime);
446
lock_utime= (thd->utime_after_lock - thd->start_utime);
450
query_utime= lock_utime= 0;
456
query= command_name[thd->command].str;
457
query_length= command_name[thd->command].length;
460
for (current_handler= slow_log_handler_list; *current_handler ;)
461
error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
462
user_host_buff, user_host_len,
463
query_utime, lock_utime, is_command,
464
query, query_length) || error;
471
bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
472
const char *query, uint query_length)
475
Log_event_handler **current_handler= general_log_handler_list;
476
char user_host_buff[MAX_USER_HOST_SIZE];
477
Security_context *sctx= thd->security_ctx;
479
uint user_host_len= 0;
483
id= thd->thread_id; /* Normal thread */
485
id= 0; /* Log from connect handler */
493
user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
497
sctx->ip, "]", NullS) -
500
current_time= my_time(0);
502
while (*current_handler)
503
error|= (*current_handler++)->
504
log_general(thd, current_time, user_host_buff,
506
command_name[(uint) command].str,
507
command_name[(uint) command].length,
509
thd->variables.character_set_client) || error;
515
bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
516
const char *format, va_list args)
518
uint message_buff_len= 0;
519
char message_buff[MAX_LOG_BUFFER_SIZE];
521
/* prepare message */
523
message_buff_len= vsnprintf(message_buff, sizeof(message_buff),
526
message_buff[0]= '\0';
528
return general_log_write(thd, command, message_buff, message_buff_len);
531
void LOGGER::init_error_log(uint error_log_printer)
533
if (error_log_printer & LOG_NONE)
535
error_log_handler_list[0]= 0;
539
switch (error_log_printer) {
541
error_log_handler_list[0]= file_log_handler;
542
error_log_handler_list[1]= 0;
547
void LOGGER::init_slow_log(uint slow_log_printer)
549
if (slow_log_printer & LOG_NONE)
551
slow_log_handler_list[0]= 0;
555
slow_log_handler_list[0]= file_log_handler;
556
slow_log_handler_list[1]= 0;
559
void LOGGER::init_general_log(uint general_log_printer)
561
if (general_log_printer & LOG_NONE)
563
general_log_handler_list[0]= 0;
567
general_log_handler_list[0]= file_log_handler;
568
general_log_handler_list[1]= 0;
572
bool LOGGER::activate_log_handler(THD* thd __attribute__((unused)),
575
DRIZZLE_QUERY_LOG *file_log;
582
file_log= file_log_handler->get_mysql_slow_log();
584
file_log->open_slow_log(sys_var_slow_log_path.value);
585
init_slow_log(log_output_options);
589
case QUERY_LOG_GENERAL:
592
file_log= file_log_handler->get_mysql_log();
594
file_log->open_query_log(sys_var_general_log_path.value);
595
init_general_log(log_output_options);
607
void LOGGER::deactivate_log_handler(THD *thd __attribute__((unused)),
611
DRIZZLE_LOG *file_log;
615
tmp_opt= &opt_slow_log;
616
file_log= file_log_handler->get_mysql_slow_log();
618
case QUERY_LOG_GENERAL:
620
file_log= file_log_handler->get_mysql_log();
623
assert(0); // Impossible
635
int LOGGER::set_handlers(uint error_log_printer,
636
uint slow_log_printer,
637
uint general_log_printer)
639
/* error log table is not supported yet */
642
init_error_log(error_log_printer);
643
init_slow_log(slow_log_printer);
644
init_general_log(general_log_printer);
653
Save position of binary log transaction cache.
656
binlog_trans_log_savepos()
658
thd The thread to take the binlog data from
659
pos Pointer to variable where the position will be stored
663
Save the current position in the binary log transaction cache into
664
the variable pointed to by 'pos'
668
binlog_trans_log_savepos(THD *thd, my_off_t *pos)
671
if (thd_get_ha_data(thd, binlog_hton) == NULL)
672
thd->binlog_setup_trx_data();
673
binlog_trx_data *const trx_data=
674
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
675
assert(mysql_bin_log.is_open());
676
*pos= trx_data->position();
682
Truncate the binary log transaction cache.
685
binlog_trans_log_truncate()
687
thd The thread to take the binlog data from
688
pos Position to truncate to
692
Truncate the binary log to the given position. Will not change
697
binlog_trans_log_truncate(THD *thd, my_off_t pos)
699
assert(thd_get_ha_data(thd, binlog_hton) != NULL);
700
/* Only true if binlog_trans_log_savepos() wasn't called before */
701
assert(pos != ~(my_off_t) 0);
703
binlog_trx_data *const trx_data=
704
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
705
trx_data->truncate(pos);
711
this function is mostly a placeholder.
712
conceptually, binlog initialization (now mostly done in DRIZZLE_BIN_LOG::open)
713
should be moved here.
716
int binlog_init(void *p)
718
binlog_hton= (handlerton *)p;
719
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
720
binlog_hton->db_type=DB_TYPE_BINLOG;
721
binlog_hton->savepoint_offset= sizeof(my_off_t);
722
binlog_hton->close_connection= binlog_close_connection;
723
binlog_hton->savepoint_set= binlog_savepoint_set;
724
binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
725
binlog_hton->commit= binlog_commit;
726
binlog_hton->rollback= binlog_rollback;
727
binlog_hton->prepare= binlog_prepare;
728
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
733
static int binlog_close_connection(handlerton *hton __attribute__((unused)),
736
binlog_trx_data *const trx_data=
737
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
738
assert(trx_data->empty());
739
thd_set_ha_data(thd, binlog_hton, NULL);
740
trx_data->~binlog_trx_data();
741
my_free((uchar*)trx_data, MYF(0));
751
thd The thread whose transaction should be ended
752
trx_data Pointer to the transaction data to use
753
end_ev The end event to use, or NULL
754
all True if the entire transaction should be ended, false if
755
only the statement transaction should be ended.
759
End the currently open transaction. The transaction can be either
760
a real transaction (if 'all' is true) or a statement transaction
763
If 'end_ev' is NULL, the transaction is a rollback of only
764
transactional tables, so the transaction cache will be truncated
765
to either just before the last opened statement transaction (if
766
'all' is false), or reset completely (if 'all' is true).
769
binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
770
Log_event *end_ev, bool all)
773
IO_CACHE *trans_log= &trx_data->trans_log;
776
NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
777
only transactional tables. If the transaction contain changes to
778
any non-transactiona tables, we need write the transaction and log
784
Doing a commit or a rollback including non-transactional tables,
785
i.e., ending a transaction where we might write the transaction
786
cache to the binary log.
788
We can always end the statement when ending a transaction since
789
transactions are not allowed inside stored functions. If they
790
were, we would have to ensure that we're not ending a statement
791
inside a stored function.
793
thd->binlog_flush_pending_rows_event(true);
795
error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
799
We need to step the table map version after writing the
800
transaction cache to disk.
802
mysql_bin_log.update_table_map_version();
803
statistic_increment(binlog_cache_use, &LOCK_status);
804
if (trans_log->disk_writes != 0)
806
statistic_increment(binlog_cache_disk_use, &LOCK_status);
807
trans_log->disk_writes= 0;
813
If rolling back an entire transaction or a single statement not
814
inside a transaction, we reset the transaction cache.
816
If rolling back a statement in a transaction, we truncate the
817
transaction cache to remove the statement.
819
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
823
assert(!thd->binlog_get_pending_rows_event());
824
thd->clear_binlog_table_maps();
827
trx_data->truncate(trx_data->before_stmt_pos);
830
We need to step the table map version on a rollback to ensure
831
that a new table map event is generated instead of the one that
832
was written to the thrown-away transaction cache.
834
mysql_bin_log.update_table_map_version();
840
static int binlog_prepare(handlerton *hton __attribute__((unused)),
841
THD *thd __attribute__((unused)),
842
bool all __attribute__((unused)))
846
just pretend we can do 2pc, so that MySQL won't
848
real work will be done in DRIZZLE_BIN_LOG::log_xid()
853
#define YESNO(X) ((X) ? "yes" : "no")
856
This function is called once after each statement.
858
It has the responsibility to flush the transaction cache to the
859
binlog file on commits.
861
@param hton The binlog handlerton.
862
@param thd The client thread that executes the transaction.
863
@param all This is @c true if this is a real transaction commit, and
866
@see handlerton::commit
868
static int binlog_commit(handlerton *hton __attribute__((unused)),
871
binlog_trx_data *const trx_data=
872
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
874
if (trx_data->empty())
876
// we're here because trans_log was flushed in DRIZZLE_BIN_LOG::log_xid()
882
Decision table for committing a transaction. The top part, the
883
*conditions* represent different cases that can occur, and hte
884
bottom part, the *actions*, represent what should be done in that
887
Real transaction 'all' was true
889
Statement in cache There were at least one statement in the
892
In transaction We are inside a transaction
894
Stmt modified non-trans The statement being committed modified a
895
non-transactional table
897
All modified non-trans Some statement before this one in the
898
transaction modified a non-transactional
902
============================= = = = = = = = = = = = = = = = =
903
Real transaction N N N N N N N N N N N N N N N N
904
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
905
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
906
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
907
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
909
Action: (C)ommit/(A)ccumulate C C - C A C - C - - - - A A - A
910
============================= = = = = = = = = = = = = = = = =
913
============================= = = = = = = = = = = = = = = = =
914
Real transaction Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y
915
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
916
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
917
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
918
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
920
(C)ommit/(A)ccumulate/(-) - - - - C C - C - - - - C C - C
921
============================= = = = = = = = = = = = = = = = =
923
In other words, we commit the transaction if and only if both of
924
the following are true:
925
- We are not in a transaction and committing a statement
927
- We are in a transaction and one (or more) of the following are
930
- A full transaction is committed
934
- A non-transactional statement is committed and there is
937
Otherwise, we accumulate the statement
939
uint64_t const in_transaction=
940
thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
941
if ((in_transaction && (all || (!trx_data->at_least_one_stmt && thd->transaction.stmt.modified_non_trans_table))) || (!in_transaction && !all))
943
Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), true, false);
944
qev.error_code= 0; // see comment in DRIZZLE_LOG::write(THD, IO_CACHE)
945
int error= binlog_end_trans(thd, trx_data, &qev, all);
952
This function is called when a transaction involving a transactional
953
table is rolled back.
955
It has the responsibility to flush the transaction cache to the
956
binlog file. However, if the transaction does not involve
957
non-transactional tables, nothing needs to be logged.
959
@param hton The binlog handlerton.
960
@param thd The client thread that executes the transaction.
961
@param all This is @c true if this is a real transaction rollback, and
964
@see handlerton::rollback
966
static int binlog_rollback(handlerton *hton __attribute__((unused)),
970
binlog_trx_data *const trx_data=
971
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
973
if (trx_data->empty()) {
978
if ((all && thd->transaction.all.modified_non_trans_table) ||
979
(!all && thd->transaction.stmt.modified_non_trans_table) ||
980
(thd->options & OPTION_KEEP_LOG))
983
We write the transaction cache with a rollback last if we have
984
modified any non-transactional table. We do this even if we are
985
committing a single statement that has modified a
986
non-transactional table since it can have modified a
987
transactional table in that statement as well, which needs to be
988
rolled back on the slave.
990
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), true, false);
991
qev.error_code= 0; // see comment in DRIZZLE_LOG::write(THD, IO_CACHE)
992
error= binlog_end_trans(thd, trx_data, &qev, all);
994
else if ((all && !thd->transaction.all.modified_non_trans_table) ||
995
(!all && !thd->transaction.stmt.modified_non_trans_table))
998
If we have modified only transactional tables, we can truncate
999
the transaction cache without writing anything to the binary
1002
error= binlog_end_trans(thd, trx_data, 0, all);
1009
How do we handle this (unlikely but legal) case:
1011
[transaction] + [update to non-trans table] + [rollback to savepoint] ?
1013
The problem occurs when a savepoint is before the update to the
1014
non-transactional table. Then when there's a rollback to the savepoint, if we
1015
simply truncate the binlog cache, we lose the part of the binlog cache where
1016
the update is. If we want to not lose it, we need to write the SAVEPOINT
1017
command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1018
is easy: it's just write at the end of the binlog cache, but the former
1019
should be *inserted* to the place where the user called SAVEPOINT. The
1020
solution is that when the user calls SAVEPOINT, we write it to the binlog
1021
cache (so no need to later insert it). As transactions are never intermixed
1022
in the binary log (i.e. they are serialized), we won't have conflicts with
1023
savepoint names when using mysqlbinlog or in the slave SQL thread.
1024
Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1025
non-transactional table, we don't truncate the binlog cache but instead write
1026
ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1027
will chop the SAVEPOINT command from the binlog cache, which is good as in
1028
that case there is no need to have it in the binlog).
1031
static int binlog_savepoint_set(handlerton *hton __attribute__((unused)),
1034
binlog_trans_log_savepos(thd, (my_off_t*) sv);
1035
/* Write it to the binary log */
1038
thd->binlog_query(THD::STMT_QUERY_TYPE,
1039
thd->query, thd->query_length, true, false);
1043
static int binlog_savepoint_rollback(handlerton *hton __attribute__((unused)),
1047
Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1048
non-transactional table. Otherwise, truncate the binlog cache starting
1049
from the SAVEPOINT command.
1051
if (unlikely(thd->transaction.all.modified_non_trans_table ||
1052
(thd->options & OPTION_KEEP_LOG)))
1055
thd->binlog_query(THD::STMT_QUERY_TYPE,
1056
thd->query, thd->query_length, true, false);
1059
binlog_trans_log_truncate(thd, *(my_off_t*)sv);
1064
int check_binlog_magic(IO_CACHE* log, const char** errmsg)
1067
assert(my_b_tell(log) == 0);
1069
if (my_b_read(log, (uchar*) magic, sizeof(magic)))
1071
*errmsg = "I/O error reading the header from the binary log";
1072
sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
1076
if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
1078
*errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
1085
File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
1089
if ((file = my_open(log_file_name, O_RDONLY | O_BINARY | O_SHARE,
1092
sql_print_error("Failed to open log (file '%s', errno %d)",
1093
log_file_name, my_errno);
1094
*errmsg = "Could not open log file";
1097
if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
1098
MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
1100
sql_print_error("Failed to create a cache on log (file '%s')",
1102
*errmsg = "Could not open log file";
1105
if (check_binlog_magic(log,errmsg))
1112
my_close(file,MYF(0));
1120
Find a unique filename for 'filename.#'.
1122
Set '#' to a number as low as possible.
1125
nonzero if not possible to get unique filename
1128
static int find_uniq_filename(char *name)
1132
char buff[FN_REFLEN];
1133
struct st_my_dir *dir_info;
1134
register struct fileinfo *file_info;
1136
size_t buf_length, length;
1139
length= dirname_part(buff, name, &buf_length);
1140
start= name + length;
1144
length= (size_t) (end-start+1);
1146
if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
1147
{ // This shouldn't happen
1148
stpcpy(end,".1"); // use name+1
1151
file_info= dir_info->dir_entry;
1152
for (i=dir_info->number_off_files ; i-- ; file_info++)
1154
if (memcmp(file_info->name, start, length) == 0 &&
1155
test_if_number(file_info->name+length, &number,0))
1157
set_if_bigger(max_found,(ulong) number);
1160
my_dirend(dir_info);
1163
sprintf(end,"%06ld",max_found+1);
1168
void DRIZZLE_LOG::init(enum_log_type log_type_arg,
1169
enum cache_type io_cache_type_arg)
1171
log_type= log_type_arg;
1172
io_cache_type= io_cache_type_arg;
1178
Open a (new) log file.
1183
log_name The name of the log to open
1184
log_type_arg The type of the log. E.g. LOG_NORMAL
1185
new_name The new name for the logfile. This is only needed
1186
when the method is used to open the binlog file.
1187
io_cache_type_arg The type of the IO_CACHE to use for this log file
1190
Open the logfile, init IO_CACHE and write startup messages
1191
(in case of general and slow query logs).
1198
bool DRIZZLE_LOG::open(const char *log_name, enum_log_type log_type_arg,
1199
const char *new_name, enum cache_type io_cache_type_arg)
1201
char buff[FN_REFLEN];
1203
int open_flags= O_CREAT | O_BINARY;
1207
init(log_type_arg, io_cache_type_arg);
1209
if (!(name= my_strdup(log_name, MYF(MY_WME))))
1211
name= (char *)log_name; // for the error message
1216
stpcpy(log_file_name, new_name);
1217
else if (generate_new_name(log_file_name, name))
1220
if (io_cache_type == SEQ_READ_APPEND)
1221
open_flags |= O_RDWR | O_APPEND;
1223
open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
1227
if ((file= my_open(log_file_name, open_flags,
1228
MYF(MY_WME | ME_WAITTANG))) < 0 ||
1229
init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
1230
my_tell(file, MYF(MY_WME)), 0,
1231
MYF(MY_WME | MY_NABP |
1232
((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
1235
if (log_type == LOG_NORMAL)
1238
int len=snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
1239
"started with:\nTCP Port: %d, Named Pipe: %s\n",
1240
my_progname, server_version, DRIZZLE_COMPILATION_COMMENT,
1243
end= stpncpy(buff + len, "Time Id Command Argument\n",
1244
sizeof(buff) - len);
1245
if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
1246
flush_io_cache(&log_file))
1250
log_state= LOG_OPENED;
1254
sql_print_error("Could not use %s for logging (error %d). \
1255
Turning logging off for the whole duration of the MySQL server process. \
1256
To turn it on again: fix the cause, \
1257
shutdown the MySQL server and restart it.", name, errno);
1259
my_close(file, MYF(0));
1260
end_io_cache(&log_file);
1262
log_state= LOG_CLOSED;
1266
DRIZZLE_LOG::DRIZZLE_LOG()
1267
: name(0), write_error(false), inited(false), log_type(LOG_UNKNOWN),
1268
log_state(LOG_CLOSED)
1271
We don't want to initialize LOCK_Log here as such initialization depends on
1272
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1273
called only in main(). Doing initialization here would make it happen
1276
memset(&log_file, 0, sizeof(log_file));
1279
void DRIZZLE_LOG::init_pthread_objects()
1281
assert(inited == 0);
1283
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1291
exiting Bitmask. For the slow and general logs the only used bit is
1292
LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call
1293
open at once after close.
1296
One can do an open on the object at once after doing a close.
1297
The internal structures are not freed until cleanup() is called
1300
void DRIZZLE_LOG::close(uint exiting)
1301
{ // One can't set log_type here!
1302
if (log_state == LOG_OPENED)
1304
end_io_cache(&log_file);
1306
if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error)
1309
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1312
if (my_close(log_file.file, MYF(MY_WME)) && ! write_error)
1315
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1319
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
1324
/** This is called only once. */
1326
void DRIZZLE_LOG::cleanup()
1331
(void) pthread_mutex_destroy(&LOCK_log);
1338
int DRIZZLE_LOG::generate_new_name(char *new_name, const char *log_name)
1340
fn_format(new_name, log_name, mysql_data_home, "", 4);
1341
if (log_type == LOG_BIN)
1343
if (!fn_ext(log_name)[0])
1345
if (find_uniq_filename(new_name))
1347
sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
1363
Reopen the log file. The method is used during FLUSH LOGS
1364
and locks LOCK_log mutex
1368
void DRIZZLE_QUERY_LOG::reopen_file()
1377
pthread_mutex_lock(&LOCK_log);
1380
name= 0; // Don't free name
1381
close(LOG_CLOSE_TO_BE_OPENED);
1384
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
1387
open(save_name, log_type, 0, io_cache_type);
1388
my_free(save_name, MYF(0));
1390
pthread_mutex_unlock(&LOCK_log);
1397
Write a command to traditional general log file
1402
event_time command start timestamp
1403
user_host the pointer to the string with user@host info
1404
user_host_len length of the user_host string. this is computed once
1405
and passed to all general log event handlers
1406
thread_id Id of the thread, issued a query
1407
command_type the type of the command being logged
1408
command_type_len the length of the string above
1409
sql_text the very text of the query being executed
1410
sql_text_len the length of sql_text string
1414
Log given command to to normal (not rotable) log file
1418
TRUE - error occured
1421
bool DRIZZLE_QUERY_LOG::write(time_t event_time,
1422
const char *user_host __attribute__((unused)),
1423
uint user_host_len __attribute__((unused)),
1425
const char *command_type, uint command_type_len,
1426
const char *sql_text, uint sql_text_len)
1430
char local_time_buff[MAX_TIME_SIZE];
1432
uint time_buff_len= 0;
1434
(void) pthread_mutex_lock(&LOCK_log);
1436
/* Test if someone closed between the is_open test and lock */
1439
/* Note that my_b_write() assumes it knows the length for this */
1440
if (event_time != last_time)
1442
last_time= event_time;
1444
localtime_r(&event_time, &start);
1446
time_buff_len= snprintf(local_time_buff, MAX_TIME_SIZE,
1447
"%02d%02d%02d %2d:%02d:%02d",
1448
start.tm_year % 100, start.tm_mon + 1,
1449
start.tm_mday, start.tm_hour,
1450
start.tm_min, start.tm_sec);
1452
if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
1456
if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
1459
/* command_type, thread_id */
1460
length= snprintf(buff, 32, "%5ld ", (long) thread_id);
1462
if (my_b_write(&log_file, (uchar*) buff, length))
1465
if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
1468
if (my_b_write(&log_file, (uchar*) "\t", 1))
1472
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
1475
if (my_b_write(&log_file, (uchar*) "\n", 1) ||
1476
flush_io_cache(&log_file))
1480
(void) pthread_mutex_unlock(&LOCK_log);
1487
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
1489
(void) pthread_mutex_unlock(&LOCK_log);
1495
Log a query to the traditional slow log file
1500
thd THD of the query
1501
current_time current timestamp
1502
query_start_arg command start timestamp
1503
user_host the pointer to the string with user@host info
1504
user_host_len length of the user_host string. this is computed once
1505
and passed to all general log event handlers
1506
query_utime Amount of time the query took to execute (in microseconds)
1507
lock_utime Amount of time the query was locked (in microseconds)
1508
is_command The flag, which determines, whether the sql_text is a
1509
query or an administrator command.
1510
sql_text the very text of the query or administrator command
1512
sql_text_len the length of sql_text string
1516
Log a query to the slow log file.
1520
TRUE - error occured
1523
bool DRIZZLE_QUERY_LOG::write(THD *thd, time_t current_time,
1524
time_t query_start_arg __attribute__((unused)),
1525
const char *user_host,
1526
uint user_host_len, uint64_t query_utime,
1527
uint64_t lock_utime, bool is_command,
1528
const char *sql_text, uint sql_text_len)
1532
(void) pthread_mutex_lock(&LOCK_log);
1536
(void) pthread_mutex_unlock(&LOCK_log);
1541
{ // Safety agains reopen
1543
char buff[80], *end;
1544
char query_time_buff[22+7], lock_time_buff[22+7];
1548
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1550
if (current_time != last_time)
1552
last_time= current_time;
1554
localtime_r(¤t_time, &start);
1556
buff_len= snprintf(buff, sizeof buff,
1557
"# Time: %02d%02d%02d %2d:%02d:%02d\n",
1558
start.tm_year % 100, start.tm_mon + 1,
1559
start.tm_mday, start.tm_hour,
1560
start.tm_min, start.tm_sec);
1562
/* Note that my_b_write() assumes it knows the length for this */
1563
if (my_b_write(&log_file, (uchar*) buff, buff_len))
1566
const uchar uh[]= "# User@Host: ";
1567
if (my_b_write(&log_file, uh, sizeof(uh) - 1))
1569
if (my_b_write(&log_file, (uchar*) user_host, user_host_len))
1571
if (my_b_write(&log_file, (uchar*) "\n", 1))
1574
/* For slow query log */
1575
sprintf(query_time_buff, "%.6f", uint64_t2double(query_utime)/1000000.0);
1576
sprintf(lock_time_buff, "%.6f", uint64_t2double(lock_utime)/1000000.0);
1577
if (my_b_printf(&log_file,
1578
"# Query_time: %s Lock_time: %s"
1579
" Rows_sent: %lu Rows_examined: %lu\n",
1580
query_time_buff, lock_time_buff,
1581
(ulong) thd->sent_row_count,
1582
(ulong) thd->examined_row_count) == (uint) -1)
1584
if (thd->db && strcmp(thd->db, db))
1585
{ // Database changed
1586
if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
1590
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
1592
end=stpcpy(end, ",last_insert_id=");
1593
end=int64_t10_to_str((int64_t)
1594
thd->first_successful_insert_id_in_prev_stmt_for_binlog,
1597
// Save value if we do an insert.
1598
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
1600
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
1602
end=stpcpy(end,",insert_id=");
1603
end=int64_t10_to_str((int64_t)
1604
thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
1610
This info used to show up randomly, depending on whether the query
1611
checked the query start time or not. now we always write current
1612
timestamp to the slow log
1614
end= stpcpy(end, ",timestamp=");
1615
end= int10_to_str((long) current_time, end, 10);
1621
if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
1622
my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
1627
end= strxmov(buff, "# administrator command: ", NullS);
1628
buff_len= (ulong) (end - buff);
1629
my_b_write(&log_file, (uchar*) buff, buff_len);
1631
if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
1632
my_b_write(&log_file, (uchar*) ";\n",2) ||
1633
flush_io_cache(&log_file))
1641
sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
1645
(void) pthread_mutex_unlock(&LOCK_log);
1652
The following should be using fn_format(); We just need to
1653
first change fn_format() to cut the file name if it's too long.
1655
const char *DRIZZLE_LOG::generate_name(const char *log_name,
1657
bool strip_ext, char *buff)
1659
if (!log_name || !log_name[0])
1661
strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
1662
return (const char *)
1663
fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
1665
// get rid of extension if the log is binary to avoid problems
1668
char *p= fn_ext(log_name);
1669
uint length= (uint) (p - log_name);
1670
strmake(buff, log_name, min(length, (uint)FN_REFLEN));
1671
return (const char*)buff;
1678
DRIZZLE_BIN_LOG::DRIZZLE_BIN_LOG()
1679
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
1680
need_start_event(true), m_table_map_version(0),
1681
description_event_for_exec(0), description_event_for_queue(0)
1684
We don't want to initialize locks here as such initialization depends on
1685
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
1686
called only in main(). Doing initialization here would make it happen
1689
index_file_name[0] = 0;
1690
memset(&index_file, 0, sizeof(index_file));
1693
/* this is called only once */
1695
void DRIZZLE_BIN_LOG::cleanup()
1700
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
1701
delete description_event_for_queue;
1702
delete description_event_for_exec;
1703
(void) pthread_mutex_destroy(&LOCK_log);
1704
(void) pthread_mutex_destroy(&LOCK_index);
1705
(void) pthread_cond_destroy(&update_cond);
1711
/* Init binlog-specific vars */
1712
void DRIZZLE_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
1714
no_auto_events= no_auto_events_arg;
1715
max_size= max_size_arg;
1720
void DRIZZLE_BIN_LOG::init_pthread_objects()
1722
assert(inited == 0);
1724
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
1725
(void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
1726
(void) pthread_cond_init(&update_cond, 0);
1730
bool DRIZZLE_BIN_LOG::open_index_file(const char *index_file_name_arg,
1731
const char *log_name)
1733
File index_file_nr= -1;
1734
assert(!my_b_inited(&index_file));
1737
First open of this class instance
1738
Create an index file that will hold all file names uses for logging.
1739
Add new entries to the end of it.
1741
myf opt= MY_UNPACK_FILENAME;
1742
if (!index_file_name_arg)
1744
index_file_name_arg= log_name; // Use same basename for index file
1745
opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
1747
fn_format(index_file_name, index_file_name_arg, mysql_data_home,
1749
if ((index_file_nr= my_open(index_file_name,
1750
O_RDWR | O_CREAT | O_BINARY ,
1751
MYF(MY_WME))) < 0 ||
1752
my_sync(index_file_nr, MYF(MY_WME)) ||
1753
init_io_cache(&index_file, index_file_nr,
1754
IO_SIZE, WRITE_CACHE,
1755
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
1756
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
1759
TODO: all operations creating/deleting the index file or a log, should
1760
call my_sync_dir() or my_sync_dir_by_file() to be durable.
1761
TODO: file creation should be done with my_create() not my_open().
1763
if (index_file_nr >= 0)
1764
my_close(index_file_nr,MYF(0));
1772
Open a (new) binlog file.
1774
- Open the log file and the index file. Register the new
1776
- When calling this when the file is in use, you must have a locks
1777
on LOCK_log and LOCK_index.
1785
bool DRIZZLE_BIN_LOG::open(const char *log_name,
1786
enum_log_type log_type_arg,
1787
const char *new_name,
1788
enum cache_type io_cache_type_arg,
1789
bool no_auto_events_arg,
1791
bool null_created_arg)
1797
/* open the main log file */
1798
if (DRIZZLE_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
1799
return(1); /* all warnings issued */
1801
init(no_auto_events_arg, max_size_arg);
1805
assert(log_type == LOG_BIN);
1808
bool write_file_name_to_index_file=0;
1810
if (!my_b_filelength(&log_file))
1813
The binary log file was empty (probably newly created)
1814
This is the normal case and happens when the user doesn't specify
1815
an extension for the binary log files.
1816
In this case we write a standard header to it.
1818
if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
1819
BIN_LOG_HEADER_SIZE))
1821
bytes_written+= BIN_LOG_HEADER_SIZE;
1822
write_file_name_to_index_file= 1;
1825
assert(my_b_inited(&index_file) != 0);
1826
reinit_io_cache(&index_file, WRITE_CACHE,
1827
my_b_filelength(&index_file), 0, 0);
1828
if (need_start_event && !no_auto_events)
1831
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
1832
even if this is not the very first binlog.
1834
Format_description_log_event s(BINLOG_VERSION);
1836
don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
1837
as we won't be able to reset it later
1839
if (io_cache_type == WRITE_CACHE)
1840
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
1843
s.dont_set_created= null_created_arg;
1844
if (s.write(&log_file))
1846
bytes_written+= s.data_written;
1848
if (description_event_for_queue &&
1849
description_event_for_queue->binlog_version>=4)
1852
This is a relay log written to by the I/O slave thread.
1853
Write the event so that others can later know the format of this relay
1855
Note that this event is very close to the original event from the
1856
master (it has binlog version of the master, event types of the
1857
master), so this is suitable to parse the next relay log's event. It
1858
has been produced by
1859
Format_description_log_event::Format_description_log_event(char* buf,).
1860
Why don't we want to write the description_event_for_queue if this
1861
event is for format<4 (3.23 or 4.x): this is because in that case, the
1862
description_event_for_queue describes the data received from the
1863
master, but not the data written to the relay log (*conversion*),
1864
which is in format 4 (slave's).
1867
Set 'created' to 0, so that in next relay logs this event does not
1868
trigger cleaning actions on the slave in
1869
Format_description_log_event::apply_event_impl().
1871
description_event_for_queue->created= 0;
1872
/* Don't set log_pos in event header */
1873
description_event_for_queue->artificial_event=1;
1875
if (description_event_for_queue->write(&log_file))
1877
bytes_written+= description_event_for_queue->data_written;
1879
if (flush_io_cache(&log_file) ||
1880
my_sync(log_file.file, MYF(MY_WME)))
1883
if (write_file_name_to_index_file)
1886
As this is a new log file, we write the file name to the index
1887
file. As every time we write to the index file, we sync it.
1889
if (my_b_write(&index_file, (uchar*) log_file_name,
1890
strlen(log_file_name)) ||
1891
my_b_write(&index_file, (uchar*) "\n", 1) ||
1892
flush_io_cache(&index_file) ||
1893
my_sync(index_file.file, MYF(MY_WME)))
1897
log_state= LOG_OPENED;
1902
sql_print_error("Could not use %s for logging (error %d). \
1903
Turning logging off for the whole duration of the MySQL server process. \
1904
To turn it on again: fix the cause, \
1905
shutdown the MySQL server and restart it.", name, errno);
1907
my_close(file,MYF(0));
1908
end_io_cache(&log_file);
1909
end_io_cache(&index_file);
1911
log_state= LOG_CLOSED;
1916
int DRIZZLE_BIN_LOG::get_current_log(LOG_INFO* linfo)
1918
pthread_mutex_lock(&LOCK_log);
1919
int ret = raw_get_current_log(linfo);
1920
pthread_mutex_unlock(&LOCK_log);
1924
int DRIZZLE_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
1926
strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
1927
linfo->pos = my_b_tell(&log_file);
1932
Move all data up in a file in an filename index file.
1934
We do the copy outside of the IO_CACHE as the cache buffers would just
1935
make things slower and more complicated.
1936
In most cases the copy loop should only do one read.
1938
@param index_file File to move
1939
@param offset Move everything from here to beginning
1942
File will be truncated to be 'offset' shorter or filled up with newlines
1948
#ifdef HAVE_REPLICATION
1950
static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
1953
my_off_t init_offset= offset;
1954
File file= index_file->file;
1955
uchar io_buf[IO_SIZE*2];
1957
for (;; offset+= bytes_read)
1959
(void) my_seek(file, offset, MY_SEEK_SET, MYF(0));
1960
if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
1964
break; // end of file
1965
(void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
1966
if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
1969
/* The following will either truncate the file or fill the end with \n' */
1970
if (ftruncate(file, offset - init_offset) || my_sync(file, MYF(MY_WME)))
1973
/* Reset data in old index cache */
1974
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
1981
#endif /* HAVE_REPLICATION */
1984
Find the position in the log-index-file for the given log name.
1986
@param linfo Store here the found log file name and position to
1987
the NEXT log file name in the index file.
1988
@param log_name Filename to find in the index file.
1989
Is a null pointer if we want to read the first entry
1990
@param need_lock Set this to 1 if the parent doesn't already have a
1994
On systems without the truncate function the file will end with one or
1995
more empty lines. These will be ignored when reading the file.
2000
LOG_INFO_EOF End of log-index-file found
2002
LOG_INFO_IO Got IO error while reading file
2005
int DRIZZLE_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
2009
char *fname= linfo->log_file_name;
2010
uint log_name_len= log_name ? (uint) strlen(log_name) : 0;
2013
Mutex needed because we need to make sure the file pointer does not
2014
move from under our feet
2017
pthread_mutex_lock(&LOCK_index);
2018
safe_mutex_assert_owner(&LOCK_index);
2020
/* As the file is flushed, we can't get an error here */
2021
(void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
2026
my_off_t offset= my_b_tell(&index_file);
2027
/* If we get 0 or 1 characters, this is the end of the file */
2029
if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2031
/* Did not find the given entry; Return not found or error */
2032
error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2036
// if the log entry matches, null string matching anything
2038
(log_name_len == length-1 && fname[log_name_len] == '\n' &&
2039
!memcmp(fname, log_name, log_name_len)))
2041
fname[length-1]=0; // remove last \n
2042
linfo->index_file_start_offset= offset;
2043
linfo->index_file_offset = my_b_tell(&index_file);
2049
pthread_mutex_unlock(&LOCK_index);
2055
Find the position in the log-index-file for the given log name.
2058
linfo Store here the next log file name and position to
2059
the file name after that.
2061
need_lock Set this to 1 if the parent doesn't already have a
2065
- Before calling this function, one has to call find_log_pos()
2067
- Mutex needed because we need to make sure the file pointer does not move
2073
LOG_INFO_EOF End of log-index-file found
2075
LOG_INFO_IO Got IO error while reading file
2078
int DRIZZLE_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
2082
char *fname= linfo->log_file_name;
2085
pthread_mutex_lock(&LOCK_index);
2086
safe_mutex_assert_owner(&LOCK_index);
2088
/* As the file is flushed, we can't get an error here */
2089
(void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
2092
linfo->index_file_start_offset= linfo->index_file_offset;
2093
if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
2095
error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
2098
fname[length-1]=0; // kill \n
2099
linfo->index_file_offset = my_b_tell(&index_file);
2103
pthread_mutex_unlock(&LOCK_index);
2109
Delete all logs refered to in the index file.
2110
Start writing to a new log file.
2112
The new index file will only contain this file.
2117
If not called from slave thread, write start event to new log
2125
bool DRIZZLE_BIN_LOG::reset_logs(THD* thd)
2129
const char* save_name;
2133
We need to get both locks to be sure that no one is trying to
2134
write to the index log file.
2136
pthread_mutex_lock(&LOCK_log);
2137
pthread_mutex_lock(&LOCK_index);
2140
The following mutex is needed to ensure that no threads call
2141
'delete thd' as we would then risk missing a 'rollback' from this
2142
thread. If the transaction involved MyISAM tables, it should go
2143
into binlog even on rollback.
2145
VOID(pthread_mutex_lock(&LOCK_thread_count));
2147
/* Save variables so that we can reopen the log */
2149
name=0; // Protect against free
2150
close(LOG_CLOSE_TO_BE_OPENED);
2152
/* First delete all old log files */
2154
if (find_log_pos(&linfo, NullS, 0))
2162
if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
2164
if (my_errno == ENOENT)
2166
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
2167
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2168
linfo.log_file_name);
2169
sql_print_information("Failed to delete file '%s'",
2170
linfo.log_file_name);
2176
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
2177
ER_BINLOG_PURGE_FATAL_ERR,
2178
"a problem with deleting %s; "
2179
"consider examining correspondence "
2180
"of your binlog index file "
2181
"to the actual binlog files",
2182
linfo.log_file_name);
2187
if (find_next_log(&linfo, 0))
2191
/* Start logging with a new file */
2192
close(LOG_CLOSE_INDEX);
2193
if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
2195
if (my_errno == ENOENT)
2197
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
2198
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2200
sql_print_information("Failed to delete file '%s'",
2207
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
2208
ER_BINLOG_PURGE_FATAL_ERR,
2209
"a problem with deleting %s; "
2210
"consider examining correspondence "
2211
"of your binlog index file "
2212
"to the actual binlog files",
2218
if (!thd->slave_thread)
2220
if (!open_index_file(index_file_name, 0))
2221
open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0);
2222
my_free((uchar*) save_name, MYF(0));
2225
VOID(pthread_mutex_unlock(&LOCK_thread_count));
2226
pthread_mutex_unlock(&LOCK_index);
2227
pthread_mutex_unlock(&LOCK_log);
2233
Delete relay log files prior to rli->group_relay_log_name
2234
(i.e. all logs which are not involved in a non-finished group
2235
(transaction)), remove them from the index file and start on next
2239
- Protects index file with LOCK_index
2240
- Delete relevant relay log files
2241
- Copy all file names after these ones to the front of the index file
2242
- If the OS has truncate, truncate the file, else fill it with \n'
2243
- Read the next file name from the index file and store in rli->linfo
2245
@param rli Relay log information
2246
@param included If false, all relay logs that are strictly before
2247
rli->group_relay_log_name are deleted ; if true, the
2248
latter is deleted too (i.e. all relay logs
2249
read by the SQL slave thread are deleted).
2252
- This is only called from the slave-execute thread when it has read
2253
all commands from a relay log and want to switch to a new relay log.
2254
- When this happens, we can be in an active transaction as
2255
a transaction can span over two relay logs
2256
(although it is always written as a single block to the master's binary
2257
log, hence cannot span over two master's binary logs).
2262
LOG_INFO_EOF End of log-index-file found
2264
LOG_INFO_SEEK Could not allocate IO cache
2266
LOG_INFO_IO Got IO error while reading file
2269
#ifdef HAVE_REPLICATION
2271
int DRIZZLE_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
2276
assert(rli->slave_running == 1);
2277
assert(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
2279
pthread_mutex_lock(&LOCK_index);
2280
pthread_mutex_lock(&rli->log_space_lock);
2281
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
2282
0, 0, &rli->log_space_total);
2283
// Tell the I/O thread to take the relay_log_space_limit into account
2284
rli->ignore_log_space_limit= 0;
2285
pthread_mutex_unlock(&rli->log_space_lock);
2288
Ok to broadcast after the critical region as there is no risk of
2289
the mutex being destroyed by this thread later - this helps save
2292
pthread_cond_broadcast(&rli->log_space_cond);
2295
Read the next log file name from the index file and pass it back to
2297
If included is true, we want the first relay log;
2298
otherwise we want the one after event_relay_log_name.
2300
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
2302
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
2303
(error=find_next_log(&rli->linfo, 0)))))
2306
sql_print_error("next log error: %d offset: %s log: %s included: %d",
2308
llstr(rli->linfo.index_file_offset,buff),
2309
rli->group_relay_log_name,
2315
Reset rli's coordinates to the current log.
2317
rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
2318
strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
2319
sizeof(rli->event_relay_log_name)-1);
2322
If we removed the rli->group_relay_log_name file,
2323
we must update the rli->group* coordinates, otherwise do not touch it as the
2324
group's execution is not finished (e.g. COMMIT not executed)
2328
rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
2329
strmake(rli->group_relay_log_name,rli->linfo.log_file_name,
2330
sizeof(rli->group_relay_log_name)-1);
2331
rli->notify_group_relay_log_name_update();
2334
/* Store where we are in the new file for the execution thread */
2335
flush_relay_log_info(rli);
2338
pthread_mutex_unlock(&LOCK_index);
2343
Update log index_file.
2346
int DRIZZLE_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
2348
if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
2351
// now update offsets in index file for running threads
2352
if (need_update_threads)
2353
adjust_linfo_offsets(log_info->index_file_start_offset);
2358
Remove all logs before the given log from disk and from the index file.
2360
@param to_log Delete all log file name before this file.
2361
@param included If true, to_log is deleted too.
2363
@param need_update_threads If we want to update the log coordinates of
2364
all threads. False for relay logs, true otherwise.
2365
@param freed_log_space If not null, decrement this variable of
2366
the amount of log space freed
2369
If any of the logs before the deleted one is in use,
2370
only purge logs up to this one.
2375
LOG_INFO_EOF to_log not found
2376
LOG_INFO_EMFILE too many files opened
2377
LOG_INFO_FATAL if any other than ENOENT error from
2378
stat() or my_delete()
2381
int DRIZZLE_BIN_LOG::purge_logs(const char *to_log,
2384
bool need_update_threads,
2385
uint64_t *decrease_log_space)
2393
pthread_mutex_lock(&LOCK_index);
2394
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
2398
File name exists in index file; delete until we find this file
2399
or a file that is used.
2401
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2403
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
2404
!log_in_use(log_info.log_file_name))
2407
if (stat(log_info.log_file_name, &s))
2409
if (errno == ENOENT)
2412
It's not fatal if we can't stat a log file that does not exist;
2413
If we could not stat, we won't delete.
2415
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
2416
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2417
log_info.log_file_name);
2418
sql_print_information("Failed to execute stat on file '%s'",
2419
log_info.log_file_name);
2425
Other than ENOENT are fatal
2427
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
2428
ER_BINLOG_PURGE_FATAL_ERR,
2429
"a problem with getting info on being purged %s; "
2430
"consider examining correspondence "
2431
"of your binlog index file "
2432
"to the actual binlog files",
2433
log_info.log_file_name);
2434
error= LOG_INFO_FATAL;
2440
if (!my_delete(log_info.log_file_name, MYF(0)))
2442
if (decrease_log_space)
2443
*decrease_log_space-= s.st_size;
2447
if (my_errno == ENOENT)
2449
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
2450
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2451
log_info.log_file_name);
2452
sql_print_information("Failed to delete file '%s'",
2453
log_info.log_file_name);
2458
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
2459
ER_BINLOG_PURGE_FATAL_ERR,
2460
"a problem with deleting %s; "
2461
"consider examining correspondence "
2462
"of your binlog index file "
2463
"to the actual binlog files",
2464
log_info.log_file_name);
2465
if (my_errno == EMFILE)
2467
error= LOG_INFO_EMFILE;
2469
error= LOG_INFO_FATAL;
2475
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2477
if (find_next_log(&log_info, 0) || exit_loop)
2482
If we get killed -9 here, the sysadmin would have to edit
2483
the log index file after restart - otherwise, this should be safe
2485
error= update_log_index(&log_info, need_update_threads);
2492
pthread_mutex_unlock(&LOCK_index);
2497
Remove all logs before the given file date from disk and from the
2500
@param thd Thread pointer
2501
@param before_date Delete all log files before given date.
2504
If any of the logs before the deleted one is in use,
2505
only purge logs up to this one.
2510
LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
2511
LOG_INFO_FATAL if any other than ENOENT error from
2512
stat() or my_delete()
2515
int DRIZZLE_BIN_LOG::purge_logs_before_date(time_t purge_time)
2519
struct stat stat_area;
2521
pthread_mutex_lock(&LOCK_index);
2524
Delete until we find curren file
2525
or a file that is used or a file
2526
that is older than purge_time.
2528
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
2531
while (strcmp(log_file_name, log_info.log_file_name) &&
2532
!log_in_use(log_info.log_file_name))
2534
if (stat(log_info.log_file_name, &stat_area))
2536
if (errno == ENOENT)
2539
It's not fatal if we can't stat a log file that does not exist.
2541
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
2542
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2543
log_info.log_file_name);
2544
sql_print_information("Failed to execute stat on file '%s'",
2545
log_info.log_file_name);
2551
Other than ENOENT are fatal
2553
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
2554
ER_BINLOG_PURGE_FATAL_ERR,
2555
"a problem with getting info on being purged %s; "
2556
"consider examining correspondence "
2557
"of your binlog index file "
2558
"to the actual binlog files",
2559
log_info.log_file_name);
2560
error= LOG_INFO_FATAL;
2566
if (stat_area.st_mtime >= purge_time)
2568
if (my_delete(log_info.log_file_name, MYF(0)))
2570
if (my_errno == ENOENT)
2572
/* It's not fatal even if we can't delete a log file */
2573
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
2574
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
2575
log_info.log_file_name);
2576
sql_print_information("Failed to delete file '%s'",
2577
log_info.log_file_name);
2582
push_warning_printf(current_thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
2583
ER_BINLOG_PURGE_FATAL_ERR,
2584
"a problem with deleting %s; "
2585
"consider examining correspondence "
2586
"of your binlog index file "
2587
"to the actual binlog files",
2588
log_info.log_file_name);
2589
error= LOG_INFO_FATAL;
2593
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
2595
if (find_next_log(&log_info, 0))
2600
If we get killed -9 here, the sysadmin would have to edit
2601
the log index file after restart - otherwise, this should be safe
2603
error= update_log_index(&log_info, 1);
2606
pthread_mutex_unlock(&LOCK_index);
2609
#endif /* HAVE_REPLICATION */
2613
Create a new log file name.
2615
@param buf buf of at least FN_REFLEN where new name is stored
2618
If file name will be longer then FN_REFLEN it will be truncated
2621
void DRIZZLE_BIN_LOG::make_log_name(char* buf, const char* log_ident)
2623
uint dir_len = dirname_length(log_file_name);
2624
if (dir_len >= FN_REFLEN)
2625
dir_len=FN_REFLEN-1;
2626
stpncpy(buf, log_file_name, dir_len);
2627
strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
2632
Check if we are writing/reading to the given log file.
2635
bool DRIZZLE_BIN_LOG::is_active(const char *log_file_name_arg)
2637
return !strcmp(log_file_name, log_file_name_arg);
2642
Wrappers around new_file_impl to avoid using argument
2643
to control locking. The argument 1) less readable 2) breaks
2644
incapsulation 3) allows external access to the class without
2645
a lock (which is not possible with private new_file_without_locking
2649
void DRIZZLE_BIN_LOG::new_file()
2655
void DRIZZLE_BIN_LOG::new_file_without_locking()
2662
Start writing to a new log file or reopen the old file.
2664
@param need_lock Set to 1 if caller has not locked LOCK_log
2667
The new file name is stored last in the index file
2670
void DRIZZLE_BIN_LOG::new_file_impl(bool need_lock)
2672
char new_name[FN_REFLEN], *new_name_ptr, *old_name;
2680
pthread_mutex_lock(&LOCK_log);
2681
pthread_mutex_lock(&LOCK_index);
2683
safe_mutex_assert_owner(&LOCK_log);
2684
safe_mutex_assert_owner(&LOCK_index);
2687
if binlog is used as tc log, be sure all xids are "unlogged",
2688
so that on recover we only need to scan one - latest - binlog file
2689
for prepared xids. As this is expected to be a rare event,
2690
simple wait strategy is enough. We're locking LOCK_log to be sure no
2691
new Xid_log_event's are added to the log (and prepared_xids is not
2692
increased), and waiting on COND_prep_xids for late threads to
2697
tc_log_page_waits++;
2698
pthread_mutex_lock(&LOCK_prep_xids);
2699
while (prepared_xids) {
2700
pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
2702
pthread_mutex_unlock(&LOCK_prep_xids);
2705
/* Reuse old name if not binlog and not update log */
2709
If user hasn't specified an extension, generate a new log name
2710
We have to do this here and not in open as we want to store the
2711
new file name in the current binary log file.
2713
if (generate_new_name(new_name, name))
2715
new_name_ptr=new_name;
2717
if (log_type == LOG_BIN)
2719
if (!no_auto_events)
2722
We log the whole file name for log file as the user may decide
2723
to change base names at some point.
2725
Rotate_log_event r(new_name+dirname_length(new_name),
2726
0, LOG_EVENT_OFFSET, 0);
2728
bytes_written += r.data_written;
2731
Update needs to be signalled even if there is no rotate event
2732
log rotation should give the waiting thread a signal to
2733
discover EOF and move on to the next log.
2738
name=0; // Don't free name
2739
close(LOG_CLOSE_TO_BE_OPENED);
2742
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2746
new_file() is only used for rotation (in FLUSH LOGS or because size >
2747
max_binlog_size or max_relay_log_size).
2748
If this is a binary log, the Format_description_log_event at the beginning of
2749
the new file should have created=0 (to distinguish with the
2750
Format_description_log_event written at server startup, which should
2751
trigger temp tables deletion on slaves.
2754
open(old_name, log_type, new_name_ptr,
2755
io_cache_type, no_auto_events, max_size, 1);
2756
my_free(old_name,MYF(0));
2760
pthread_mutex_unlock(&LOCK_log);
2761
pthread_mutex_unlock(&LOCK_index);
2767
bool DRIZZLE_BIN_LOG::append(Log_event* ev)
2770
pthread_mutex_lock(&LOCK_log);
2772
assert(log_file.type == SEQ_READ_APPEND);
2774
Log_event::write() is smart enough to use my_b_write() or
2775
my_b_append() depending on the kind of cache we have.
2777
if (ev->write(&log_file))
2782
bytes_written+= ev->data_written;
2783
if ((uint) my_b_append_tell(&log_file) > max_size)
2784
new_file_without_locking();
2787
pthread_mutex_unlock(&LOCK_log);
2788
signal_update(); // Safe as we don't call close
2793
bool DRIZZLE_BIN_LOG::appendv(const char* buf, uint len,...)
2799
assert(log_file.type == SEQ_READ_APPEND);
2801
safe_mutex_assert_owner(&LOCK_log);
2804
if (my_b_append(&log_file,(uchar*) buf,len))
2809
bytes_written += len;
2810
} while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
2811
if ((uint) my_b_append_tell(&log_file) > max_size)
2812
new_file_without_locking();
2821
bool DRIZZLE_BIN_LOG::flush_and_sync()
2823
int err=0, fd=log_file.file;
2824
safe_mutex_assert_owner(&LOCK_log);
2825
if (flush_io_cache(&log_file))
2827
if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
2829
sync_binlog_counter= 0;
2830
err=my_sync(fd, MYF(MY_WME));
2835
void DRIZZLE_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
2837
assert(!thd->binlog_evt_union.do_union);
2838
thd->binlog_evt_union.do_union= true;
2839
thd->binlog_evt_union.unioned_events= false;
2840
thd->binlog_evt_union.unioned_events_trans= false;
2841
thd->binlog_evt_union.first_query_id= query_id_param;
2844
void DRIZZLE_BIN_LOG::stop_union_events(THD *thd)
2846
assert(thd->binlog_evt_union.do_union);
2847
thd->binlog_evt_union.do_union= false;
2850
bool DRIZZLE_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
2852
return (thd->binlog_evt_union.do_union &&
2853
query_id_param >= thd->binlog_evt_union.first_query_id);
2858
These functions are placed in this file since they need access to
2859
binlog_hton, which has internal linkage.
2862
int THD::binlog_setup_trx_data()
2864
binlog_trx_data *trx_data=
2865
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2868
return(0); // Already set up
2870
trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL));
2872
open_cached_file(&trx_data->trans_log, mysql_tmpdir,
2873
LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
2875
my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR));
2876
return(1); // Didn't manage to set it up
2878
thd_set_ha_data(this, binlog_hton, trx_data);
2880
trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data;
2886
Function to start a statement and optionally a transaction for the
2890
binlog_start_trans_and_stmt()
2894
This function does three things:
2895
- Start a transaction if not in autocommit mode or if a BEGIN
2896
statement has been seen.
2898
- Start a statement transaction to allow us to truncate the binary
2901
- Save the currrent binlog position so that we can roll back the
2902
statement by truncating the transaction log.
2904
We only update the saved position if the old one was undefined,
2905
the reason is that there are some cases (e.g., for CREATE-SELECT)
2906
where the position is saved twice (e.g., both in
2907
select_create::prepare() and THD::binlog_write_table_map()) , but
2908
we should use the first. This means that calls to this function
2909
can be used to start the statement before the first table map
2910
event, to include some extra events.
2914
THD::binlog_start_trans_and_stmt()
2916
binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2918
if (trx_data == NULL ||
2919
trx_data->before_stmt_pos == MY_OFF_T_UNDEF)
2921
this->binlog_set_stmt_begin();
2922
if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
2923
trans_register_ha(this, true, binlog_hton);
2924
trans_register_ha(this, false, binlog_hton);
2926
Mark statement transaction as read/write. We never start
2927
a binary log transaction and keep it read-only,
2928
therefore it's best to mark the transaction read/write just
2929
at the same time we start it.
2930
Not necessary to mark the normal transaction read/write
2931
since the statement-level flag will be propagated automatically
2932
inside ha_commit_trans.
2934
ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
2939
void THD::binlog_set_stmt_begin() {
2940
binlog_trx_data *trx_data=
2941
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2944
The call to binlog_trans_log_savepos() might create the trx_data
2945
structure, if it didn't exist before, so we save the position
2946
into an auto variable and then write it into the transaction
2947
data for the binary log (i.e., trx_data).
2950
binlog_trans_log_savepos(this, &pos);
2951
trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2952
trx_data->before_stmt_pos= pos;
2957
Write a table map to the binary log.
2960
int THD::binlog_write_table_map(TABLE *table, bool is_trans)
2964
/* Pre-conditions */
2965
assert(current_stmt_binlog_row_based && mysql_bin_log.is_open());
2966
assert(table->s->table_map_id != ULONG_MAX);
2968
Table_map_log_event::flag_set const
2969
flags= Table_map_log_event::TM_NO_FLAGS;
2972
the_event(this, table, table->s->table_map_id, is_trans, flags);
2974
if (is_trans && binlog_table_maps == 0)
2975
binlog_start_trans_and_stmt();
2977
if ((error= mysql_bin_log.write(&the_event)))
2980
binlog_table_maps++;
2981
table->s->table_map_version= mysql_bin_log.table_map_version();
2986
THD::binlog_get_pending_rows_event() const
2988
binlog_trx_data *const trx_data=
2989
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
2991
This is less than ideal, but here's the story: If there is no
2992
trx_data, prepare_pending_rows_event() has never been called
2993
(since the trx_data is set up there). In that case, we just return
2996
return trx_data ? trx_data->pending() : NULL;
3000
THD::binlog_set_pending_rows_event(Rows_log_event* ev)
3002
if (thd_get_ha_data(this, binlog_hton) == NULL)
3003
binlog_setup_trx_data();
3005
binlog_trx_data *const trx_data=
3006
(binlog_trx_data*) thd_get_ha_data(this, binlog_hton);
3009
trx_data->set_pending(ev);
3014
Moves the last bunch of rows from the pending Rows event to the binlog
3015
(either cached binlog if transaction, or disk binlog). Sets a new pending
3019
DRIZZLE_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
3020
Rows_log_event* event)
3022
assert(mysql_bin_log.is_open());
3026
binlog_trx_data *const trx_data=
3027
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3031
if (Rows_log_event* pending= trx_data->pending())
3033
IO_CACHE *file= &log_file;
3036
Decide if we should write to the log file directly or to the
3039
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
3040
file= &trx_data->trans_log;
3043
If we are writing to the log file directly, we could avoid
3044
locking the log. This does not work since we need to step the
3045
m_table_map_version below, and that change has to be protected
3046
by the LOCK_log mutex.
3048
pthread_mutex_lock(&LOCK_log);
3051
Write pending event to log file or transaction cache
3053
if (pending->write(file))
3055
pthread_mutex_unlock(&LOCK_log);
3060
We step the table map version if we are writing an event
3061
representing the end of a statement. We do this regardless of
3062
wheather we write to the transaction cache or to directly to the
3065
In an ideal world, we could avoid stepping the table map version
3066
if we were writing to a transaction cache, since we could then
3067
reuse the table map that was written earlier in the transaction
3068
cache. This does not work since STMT_END_F implies closing all
3069
table mappings on the slave side.
3071
TODO: Find a solution so that table maps does not have to be
3072
written several times within a transaction.
3074
if (pending->get_flags(Rows_log_event::STMT_END_F))
3075
++m_table_map_version;
3079
if (file == &log_file)
3081
error= flush_and_sync();
3085
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3089
pthread_mutex_unlock(&LOCK_log);
3092
thd->binlog_set_pending_rows_event(event);
3098
Write an event to the binary log.
3101
bool DRIZZLE_BIN_LOG::write(Log_event *event_info)
3103
THD *thd= event_info->thd;
3106
if (thd->binlog_evt_union.do_union)
3109
In Stored function; Remember that function call caused an update.
3110
We will log the function call to the binary log on function exit
3112
thd->binlog_evt_union.unioned_events= true;
3113
thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
3118
Flush the pending rows event to the transaction cache or to the
3119
log file. Since this function potentially aquire the LOCK_log
3120
mutex, we do this before aquiring the LOCK_log mutex in this
3123
We only end the statement if we are in a top-level statement. If
3124
we are inside a stored function, we do not end the statement since
3125
this will close all tables on the slave.
3127
bool const end_stmt= false;
3128
thd->binlog_flush_pending_rows_event(end_stmt);
3130
pthread_mutex_lock(&LOCK_log);
3133
In most cases this is only called if 'is_open()' is true; in fact this is
3134
mostly called if is_open() *was* true a few instructions before, but it
3135
could have changed since.
3137
if (likely(is_open()))
3139
IO_CACHE *file= &log_file;
3141
In the future we need to add to the following if tests like
3142
"do the involved tables match (to be implemented)
3143
binlog_[wild_]{do|ignore}_table?" (WL#1049)"
3145
const char *local_db= event_info->get_db();
3146
if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
3147
(!binlog_filter->db_ok(local_db)))
3149
VOID(pthread_mutex_unlock(&LOCK_log));
3154
Should we write to the binlog cache or to the binlog on disk?
3155
Write to the binlog cache if:
3156
- it is already not empty (meaning we're in a transaction; note that the
3157
present event could be about a non-transactional table, but still we need
3158
to write to the binlog cache in that case to handle updates to mixed
3159
trans/non-trans table types the best possible in binlogging)
3160
- or if the event asks for it (cache_stmt == TRUE).
3162
if (opt_using_transactions && thd)
3164
if (thd->binlog_setup_trx_data())
3167
binlog_trx_data *const trx_data=
3168
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
3169
IO_CACHE *trans_log= &trx_data->trans_log;
3170
my_off_t trans_log_pos= my_b_tell(trans_log);
3171
if (event_info->get_cache_stmt() || trans_log_pos != 0)
3173
if (trans_log_pos == 0)
3174
thd->binlog_start_trans_and_stmt();
3178
TODO as Mats suggested, for all the cases above where we write to
3179
trans_log, it sounds unnecessary to lock LOCK_log. We should rather
3180
test first if we want to write to trans_log, and if not, lock
3186
No check for auto events flag here - this write method should
3187
never be called if auto-events are enabled
3191
1. Write first log events which describe the 'run environment'
3196
If row-based binlogging, Insert_id, Rand and other kind of "setting
3197
context" events are not needed.
3201
if (!thd->current_stmt_binlog_row_based)
3203
if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3205
Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
3206
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
3210
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3213
If the auto_increment was second in a table's index (possible with
3214
MyISAM or BDB) (table->next_number_keypart != 0), such event is
3215
in fact not necessary. We could avoid logging it.
3217
Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
3218
thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3225
Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
3229
if (thd->user_var_events.elements)
3231
for (uint i= 0; i < thd->user_var_events.elements; i++)
3233
BINLOG_USER_VAR_EVENT *user_var_event;
3234
get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
3235
User_var_log_event e(thd, user_var_event->user_var_event->name.str,
3236
user_var_event->user_var_event->name.length,
3237
user_var_event->value,
3238
user_var_event->length,
3239
user_var_event->type,
3240
user_var_event->charset_number);
3249
Write the SQL command
3252
if (event_info->write(file))
3255
if (file == &log_file) // we are writing to the real log (disk)
3257
if (flush_and_sync())
3260
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3267
if (my_errno == EFBIG)
3268
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
3270
my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
3275
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
3276
++m_table_map_version;
3278
pthread_mutex_unlock(&LOCK_log);
3283
int error_log_print(enum loglevel level, const char *format,
3286
return logger.error_log_print(level, format, args);
3290
bool slow_log_print(THD *thd, const char *query, uint query_length,
3291
uint64_t current_utime)
3293
return logger.slow_log_print(thd, query, query_length, current_utime);
3297
bool LOGGER::log_command(THD *thd, enum enum_server_command command)
3300
Log command if we have at least one log event handler enabled and want
3301
to log this king of commands
3303
if (*general_log_handler_list && (what_to_log & (1L << (uint) command)))
3305
if (thd->options & OPTION_LOG_OFF)
3318
bool general_log_print(THD *thd, enum enum_server_command command,
3319
const char *format, ...)
3324
/* Print the message to the buffer if we want to log this king of commands */
3325
if (! logger.log_command(thd, command))
3328
va_start(args, format);
3329
error= logger.general_log_print(thd, command, format, args);
3335
bool general_log_write(THD *thd, enum enum_server_command command,
3336
const char *query, uint query_length)
3338
/* Write the message to the log if we want to log this king of commands */
3339
if (logger.log_command(thd, command))
3340
return logger.general_log_write(thd, command, query, query_length);
3345
void DRIZZLE_BIN_LOG::rotate_and_purge(uint flags)
3347
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3348
pthread_mutex_lock(&LOCK_log);
3349
if ((flags & RP_FORCE_ROTATE) ||
3350
(my_b_tell(&log_file) >= (my_off_t) max_size))
3352
new_file_without_locking();
3353
#ifdef HAVE_REPLICATION
3354
if (expire_logs_days)
3356
time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
3357
if (purge_time >= 0)
3358
purge_logs_before_date(purge_time);
3362
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
3363
pthread_mutex_unlock(&LOCK_log);
3366
uint DRIZZLE_BIN_LOG::next_file_id()
3369
pthread_mutex_lock(&LOCK_log);
3371
pthread_mutex_unlock(&LOCK_log);
3377
Write the contents of a cache to the binary log.
3381
cache Cache to write to the binary log
3382
lock_log True if the LOCK_log mutex should be aquired, false otherwise
3383
sync_log True if the log should be flushed and sync:ed
3386
Write the contents of the cache to the binary log. The cache will
3387
be reset as a READ_CACHE to be able to read the contents from it.
3390
int DRIZZLE_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
3392
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
3394
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
3395
return ER_ERROR_ON_WRITE;
3396
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
3398
uchar header[LOG_EVENT_HEADER_LEN];
3401
The events in the buffer have incorrect end_log_pos data
3402
(relative to beginning of group rather than absolute),
3403
so we'll recalculate them in situ so the binlog is always
3404
correct, even in the middle of a group. This is possible
3405
because we now know the start position of the group (the
3406
offset of this cache in the log, if you will); all we need
3407
to do is to find all event-headers, and add the position of
3408
the group to the end_log_pos of each event. This is pretty
3409
straight forward, except that we read the cache in segments,
3410
so an event-header might end up on the cache-border and get
3414
group= (uint)my_b_tell(&log_file);
3421
if we only got a partial header in the last iteration,
3422
get the other half now and process a full header.
3424
if (unlikely(carry > 0))
3426
assert(carry < LOG_EVENT_HEADER_LEN);
3428
/* assemble both halves */
3429
memcpy(&header[carry], cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
3431
/* fix end_log_pos */
3432
val= uint4korr(&header[LOG_POS_OFFSET]) + group;
3433
int4store(&header[LOG_POS_OFFSET], val);
3435
/* write the first half of the split header */
3436
if (my_b_write(&log_file, header, carry))
3437
return ER_ERROR_ON_WRITE;
3440
copy fixed second half of header to cache so the correct
3441
version will be written later.
3443
memcpy(cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
3445
/* next event header at ... */
3446
hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
3451
/* if there is anything to write, process it. */
3453
if (likely(length > 0))
3456
process all event-headers in this (partial) cache.
3457
if next header is beyond current read-buffer,
3458
we'll get it later (though not necessarily in the
3459
very next iteration, just "eventually").
3462
while (hdr_offs < length)
3465
partial header only? save what we can get, process once
3469
if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
3471
carry= length - hdr_offs;
3472
memcpy(header, cache->read_pos + hdr_offs, carry);
3477
/* we've got a full event-header, and it came in one piece */
3479
uchar *log_pos= (uchar *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
3481
/* fix end_log_pos */
3482
val= uint4korr(log_pos) + group;
3483
int4store(log_pos, val);
3485
/* next event header at ... */
3486
log_pos= (uchar *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
3487
hdr_offs += uint4korr(log_pos);
3493
Adjust hdr_offs. Note that it may still point beyond the segment
3494
read in the next iteration; if the current event is very long,
3495
it may take a couple of read-iterations (and subsequent adjustments
3496
of hdr_offs) for it to point into the then-current segment.
3497
If we have a split header (!carry), hdr_offs will be set at the
3498
beginning of the next iteration, overwriting the value we set here:
3503
/* Write data to the binary log file */
3504
if (my_b_write(&log_file, cache->read_pos, length))
3505
return ER_ERROR_ON_WRITE;
3506
cache->read_pos=cache->read_end; // Mark buffer used up
3507
} while ((length= my_b_fill(cache)));
3518
Write a cached log entry to the binary log.
3519
- To support transaction over replication, we wrap the transaction
3520
with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
3521
We want to write a BEGIN/ROLLBACK block when a non-transactional table
3522
was updated in a transaction which was rolled back. This is to ensure
3523
that the same updates are run on the slave.
3526
@param cache The cache to copy to the binlog
3527
@param commit_event The commit event to print after writing the
3528
contents of the cache.
3531
We only come here if there is something in the cache.
3533
The thing in the cache is always a complete transaction.
3535
'cache' needs to be reinitialized after this functions returns.
3538
bool DRIZZLE_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
3540
VOID(pthread_mutex_lock(&LOCK_log));
3542
/* NULL would represent nothing to replicate after ROLLBACK */
3543
assert(commit_event != NULL);
3546
if (likely(is_open())) // Should always be true
3549
We only bother to write to the binary log if there is anything
3552
if (my_b_tell(cache) > 0)
3555
Log "BEGIN" at the beginning of every transaction. Here, a
3556
transaction is either a BEGIN..COMMIT block or a single
3557
statement in autocommit mode.
3559
Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), true, false);
3561
Imagine this is rollback due to net timeout, after all
3562
statements of the transaction succeeded. Then we want a
3563
zero-error code in BEGIN. In other words, if there was a
3564
really serious error code it's already in the statement's
3565
events, there is no need to put it also in this internally
3566
generated event, and as this event is generated late it would
3567
lead to false alarms.
3569
This is safer than thd->clear_error() against kills at shutdown.
3571
qinfo.error_code= 0;
3573
Now this Query_log_event has artificial log_pos 0. It must be
3574
adjusted to reflect the real position in the log. Not doing it
3575
would confuse the slave: it would prevent this one from
3576
knowing where he is in the master's binlog, which would result
3577
in wrong positions being shown to the user, MASTER_POS_WAIT
3580
if (qinfo.write(&log_file))
3583
if ((write_error= write_cache(cache, false, false)))
3586
if (commit_event && commit_event->write(&log_file))
3588
if (flush_and_sync())
3590
if (cache->error) // Error on read
3592
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
3593
write_error=1; // Don't give more errors
3600
if commit_event is Xid_log_event, increase the number of
3601
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
3602
if there're prepared xids in it - see the comment in new_file() for
3604
If the commit_event is not Xid_log_event (then it's a Query_log_event)
3605
rotate binlog, if necessary.
3607
if (commit_event && commit_event->get_type_code() == XID_EVENT)
3609
pthread_mutex_lock(&LOCK_prep_xids);
3611
pthread_mutex_unlock(&LOCK_prep_xids);
3614
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
3616
VOID(pthread_mutex_unlock(&LOCK_log));
3624
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
3626
VOID(pthread_mutex_unlock(&LOCK_log));
3632
Wait until we get a signal that the relay log has been updated
3634
@param[in] thd a THD struct
3636
LOCK_log must be taken before calling this function.
3637
It will be released at the end of the function.
3640
void DRIZZLE_BIN_LOG::wait_for_update_relay_log(THD* thd)
3642
const char *old_msg;
3643
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3644
"Slave has read all relay log; "
3645
"waiting for the slave I/O "
3646
"thread to update it" );
3647
pthread_cond_wait(&update_cond, &LOCK_log);
3648
thd->exit_cond(old_msg);
3654
Wait until we get a signal that the binary log has been updated.
3655
Applies to master only.
3658
@param[in] thd a THD struct
3659
@param[in] timeout a pointer to a timespec;
3660
NULL means to wait w/o timeout.
3661
@retval 0 if got signalled on update
3662
@retval non-0 if wait timeout elapsed
3664
LOCK_log must be taken before calling this function.
3665
LOCK_log is being released while the thread is waiting.
3666
LOCK_log is released by the caller.
3669
int DRIZZLE_BIN_LOG::wait_for_update_bin_log(THD* thd,
3670
const struct timespec *timeout)
3673
const char* old_msg = thd->proc_info;
3674
old_msg= thd->enter_cond(&update_cond, &LOCK_log,
3675
"Master has sent all binlog to slave; "
3676
"waiting for binlog to be updated");
3678
pthread_cond_wait(&update_cond, &LOCK_log);
3680
ret= pthread_cond_timedwait(&update_cond, &LOCK_log,
3681
const_cast<struct timespec *>(timeout));
3689
@param exiting Bitmask for one or more of the following bits:
3690
- LOG_CLOSE_INDEX : if we should close the index file
3691
- LOG_CLOSE_TO_BE_OPENED : if we intend to call open
3692
at once after close.
3693
- LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
3696
One can do an open on the object at once after doing a close.
3697
The internal structures are not freed until cleanup() is called
3700
void DRIZZLE_BIN_LOG::close(uint exiting)
3701
{ // One can't set log_type here!
3702
if (log_state == LOG_OPENED)
3704
#ifdef HAVE_REPLICATION
3705
if (log_type == LOG_BIN && !no_auto_events &&
3706
(exiting & LOG_CLOSE_STOP_EVENT))
3710
bytes_written+= s.data_written;
3713
#endif /* HAVE_REPLICATION */
3715
/* don't pwrite in a file opened with O_APPEND - it doesn't work */
3716
if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
3718
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
3719
uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
3720
pwrite(log_file.file, &flags, 1, offset);
3723
/* this will cleanup IO_CACHE, sync and close the file */
3724
DRIZZLE_LOG::close(exiting);
3728
The following test is needed even if is_open() is not set, as we may have
3729
called a not complete close earlier and the index file is still open.
3732
if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
3734
end_io_cache(&index_file);
3735
if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
3738
sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
3741
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
3747
void DRIZZLE_BIN_LOG::set_max_size(ulong max_size_arg)
3750
We need to take locks, otherwise this may happen:
3751
new_file() is called, calls open(old_max_size), then before open() starts,
3752
set_max_size() sets max_size to max_size_arg, then open() starts and
3753
uses the old_max_size argument, so max_size_arg has been overwritten and
3754
it's like if the SET command was never run.
3756
pthread_mutex_lock(&LOCK_log);
3758
max_size= max_size_arg;
3759
pthread_mutex_unlock(&LOCK_log);
3765
Check if a string is a valid number.
3767
@param str String to test
3768
@param res Store value here
3769
@param allow_wildcards Set to 1 if we should ignore '%' and '_'
3772
For the moment the allow_wildcards argument is not used
3773
Should be move to some other file.
3776
1 String is a number
3781
static bool test_if_number(register const char *str,
3782
long *res, bool allow_wildcards)
3789
while (*str++ == ' ') ;
3790
if (*--str == '-' || *str == '+')
3792
while (my_isdigit(files_charset_info,*str) ||
3793
(allow_wildcards && (*str == wild_many || *str == wild_one)))
3801
my_isdigit(files_charset_info,*str) ||
3802
(allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3805
if (*str != 0 || flag == 0)
3809
return(1); /* Number ok */
3810
} /* test_if_number */
3813
void sql_perror(const char *message)
3815
#ifdef HAVE_STRERROR
3816
sql_print_error("%s: %s",message, strerror(errno));
3823
bool flush_error_log()
3828
char err_renamed[FN_REFLEN], *end;
3829
end= strmake(err_renamed,log_error_file,FN_REFLEN-4);
3830
stpcpy(end, "-old");
3831
VOID(pthread_mutex_lock(&LOCK_error_log));
3832
char err_temp[FN_REFLEN+4];
3834
On Windows is necessary a temporary file for to rename
3835
the current error file.
3837
strxmov(err_temp, err_renamed,"-tmp",NullS);
3838
(void) my_delete(err_temp, MYF(0));
3839
if (freopen(err_temp,"a+",stdout))
3845
freopen(err_temp,"a+",stderr);
3846
(void) my_delete(err_renamed, MYF(0));
3847
my_rename(log_error_file,err_renamed,MYF(0));
3848
if (freopen(log_error_file,"a+",stdout))
3849
freopen(log_error_file,"a+",stderr);
3851
if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0)
3853
while ((bytes= my_read(fd, buf, IO_SIZE, MYF(0))) &&
3854
bytes != MY_FILE_ERROR)
3855
my_fwrite(stderr, buf, bytes, MYF(0));
3856
my_close(fd, MYF(0));
3858
(void) my_delete(err_temp, MYF(0));
3862
VOID(pthread_mutex_unlock(&LOCK_error_log));
3867
void DRIZZLE_BIN_LOG::signal_update()
3869
pthread_cond_broadcast(&update_cond);
3874
Prints a printf style message to the error log and, under NT, to the
3877
This function prints the message into a buffer and then sends that buffer
3878
to other functions to write that message to other logging sources.
3880
@param event_type Type of event to write (Error, Warning, or Info)
3881
@param format Printf style format of message
3882
@param args va_list list of arguments for the message
3885
The function always returns 0. The return value is present in the
3886
signature to be compatible with other logging routines, which could
3887
return an error (e.g. logging to the log tables)
3889
static void print_buffer_to_file(enum loglevel level,
3890
int error_code __attribute__((unused)),
3892
size_t buffer_length __attribute__((unused)))
3898
VOID(pthread_mutex_lock(&LOCK_error_log));
3901
localtime_r(&skr, &tm_tmp);
3904
fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n",
3905
start->tm_year % 100,
3911
(level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
3912
"Warning" : "Note"),
3917
VOID(pthread_mutex_unlock(&LOCK_error_log));
3922
int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
3926
int error_code= errno;
3928
length= vsnprintf(buff, sizeof(buff), format, args);
3930
print_buffer_to_file(level, error_code, buff, length);
3936
void sql_print_error(const char *format, ...)
3940
va_start(args, format);
3941
error_log_print(ERROR_LEVEL, format, args);
3948
void sql_print_warning(const char *format, ...)
3952
va_start(args, format);
3953
error_log_print(WARNING_LEVEL, format, args);
3960
void sql_print_information(const char *format, ...)
3964
va_start(args, format);
3965
error_log_print(INFORMATION_LEVEL, format, args);
3972
/********* transaction coordinator log for 2pc - mmap() based solution *******/
3975
the log consists of a file, mmapped to a memory.
3976
file is divided on pages of tc_log_page_size size.
3977
(usable size of the first page is smaller because of log header)
3978
there's PAGE control structure for each page
3979
each page (or rather PAGE control structure) can be in one of three
3980
states - active, syncing, pool.
3981
there could be only one page in active or syncing states,
3982
but many in pool - pool is fifo queue.
3983
usual lifecycle of a page is pool->active->syncing->pool
3984
"active" page - is a page where new xid's are logged.
3985
the page stays active as long as syncing slot is taken.
3986
"syncing" page is being synced to disk. no new xid can be added to it.
3987
when the sync is done the page is moved to a pool and an active page
3990
the result of such an architecture is a natural "commit grouping" -
3991
If commits are coming faster than the system can sync, they do not
3992
stall. Instead, all commit that came since the last sync are
3993
logged to the same page, and they all are synced with the next -
3994
one - sync. Thus, thought individual commits are delayed, throughput
3997
when a xid is added to an active page, the thread of this xid waits
3998
for a page's condition until the page is synced. when syncing slot
3999
becomes vacant one of these waiters is awaken to take care of syncing.
4000
it syncs the page and signals all waiters that the page is synced.
4001
PAGE::waiters is used to count these waiters, and a page may never
4002
become active again until waiters==0 (that is all waiters from the
4003
previous sync have noticed the sync was completed)
4005
note, that the page becomes "dirty" and has to be synced only when a
4006
new xid is added into it. Removing a xid from a page does not make it
4007
dirty - we don't sync removals to disk.
4010
ulong tc_log_page_waits= 0;
4014
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
4016
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
4018
ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
4019
ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
4021
int TC_LOG_MMAP::open(const char *opt_name)
4024
bool crashed= false;
4027
assert(total_ha_2pc > 1);
4028
assert(opt_name && opt_name[0]);
4030
tc_log_page_size= getpagesize();
4031
assert(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
4033
fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
4034
if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
4036
if (my_errno != ENOENT)
4038
if (using_heuristic_recover())
4040
if ((fd= my_create(logname, CREATE_MODE, O_RDWR, MYF(MY_WME))) < 0)
4043
file_length= opt_tc_log_size;
4044
if (ftruncate(fd, file_length))
4051
sql_print_information("Recovering after a crash using %s", opt_name);
4052
if (tc_heuristic_recover)
4054
sql_print_error("Cannot perform automatic crash recovery when "
4055
"--tc-heuristic-recover is used");
4058
file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
4059
if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
4063
data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
4064
MAP_NOSYNC|MAP_SHARED, fd, 0);
4065
if (data == MAP_FAILED)
4072
npages=(uint)file_length/tc_log_page_size;
4073
assert(npages >= 3); // to guarantee non-empty pool
4074
if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
4077
for (pg=pages, i=0; i < npages; i++, pg++)
4082
pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
4083
pthread_cond_init (&pg->cond, 0);
4084
pg->start=(my_xid *)(data + i*tc_log_page_size);
4086
pg->end=(my_xid *)(pg->start + tc_log_page_size);
4087
pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
4089
pages[0].size=pages[0].free=
4090
(tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
4091
pages[0].start=pages[0].end-pages[0].size;
4092
pages[npages-1].next=0;
4095
if (crashed && recover())
4098
memcpy(data, tc_log_magic, sizeof(tc_log_magic));
4099
data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
4100
msync(data, tc_log_page_size, MS_SYNC);
4101
my_sync(fd, MYF(0));
4104
pthread_mutex_init(&LOCK_sync, MY_MUTEX_INIT_FAST);
4105
pthread_mutex_init(&LOCK_active, MY_MUTEX_INIT_FAST);
4106
pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST);
4107
pthread_cond_init(&COND_active, 0);
4108
pthread_cond_init(&COND_pool, 0);
4115
pool_last=pages+npages-1;
4125
there is no active page, let's got one from the pool.
4127
Two strategies here:
4128
-# take the first from the pool
4129
-# if there're waiters - take the one with the most free space.
4132
TODO page merging. try to allocate adjacent page first,
4133
so that they can be flushed both in one sync
4136
void TC_LOG_MMAP::get_active_from_pool()
4138
PAGE **p, **best_p=0;
4142
pthread_mutex_lock(&LOCK_pool);
4147
if ((*p)->waiters == 0) // can the first page be used ?
4148
break; // yes - take it.
4150
best_free=0; // no - trying second strategy
4151
for (p=&(*p)->next; *p; p=&(*p)->next)
4153
if ((*p)->waiters == 0 && (*p)->free > best_free)
4155
best_free=(*p)->free;
4160
while ((*best_p == 0 || best_free == 0) && overflow());
4163
if (active->free == active->size) // we've chosen an empty page
4165
tc_log_cur_pages_used++;
4166
set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
4169
if ((*best_p)->next) // unlink the page from the pool
4170
*best_p=(*best_p)->next;
4175
pthread_mutex_unlock(&LOCK_pool);
4180
perhaps, increase log size ?
4182
int TC_LOG_MMAP::overflow()
4185
simple overflow handling - just wait
4186
TODO perhaps, increase log size ?
4187
let's check the behaviour of tc_log_page_waits first
4189
tc_log_page_waits++;
4190
pthread_cond_wait(&COND_pool, &LOCK_pool);
4191
return 1; // always return 1
4195
Record that transaction XID is committed on the persistent storage.
4197
This function is called in the middle of two-phase commit:
4198
First all resources prepare the transaction, then tc_log->log() is called,
4199
then all resources commit the transaction, then tc_log->unlog() is called.
4201
All access to active page is serialized but it's not a problem, as
4202
we're assuming that fsync() will be a main bottleneck.
4203
That is, parallelizing writes to log pages we'll decrease number of
4204
threads waiting for a page, but then all these threads will be waiting
4205
for a fsync() anyway
4207
If tc_log == DRIZZLE_LOG then tc_log writes transaction to binlog and
4208
records XID in a special Xid_log_event.
4209
If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
4215
\# - otherwise, "cookie", a number that will be passed as an argument
4216
to unlog() call. tc_log can define it any way it wants,
4217
and use for whatever purposes. TC_LOG_MMAP sets it
4218
to the position in memory where xid was logged to.
4221
int TC_LOG_MMAP::log_xid(THD *thd __attribute__((unused)), my_xid xid)
4227
pthread_mutex_lock(&LOCK_active);
4230
if active page is full - just wait...
4231
frankly speaking, active->free here accessed outside of mutex
4232
protection, but it's safe, because it only means we may miss an
4233
unlog() for the active page, and we're not waiting for it here -
4234
unlog() does not signal COND_active.
4236
while (unlikely(active && active->free == 0))
4237
pthread_cond_wait(&COND_active, &LOCK_active);
4239
/* no active page ? take one from the pool */
4241
get_active_from_pool();
4244
pthread_mutex_lock(&p->lock);
4246
/* searching for an empty slot */
4250
assert(p->ptr < p->end); // because p->free > 0
4253
/* found! store xid there and mark the page dirty */
4254
cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
4259
/* to sync or not to sync - this is the question */
4260
pthread_mutex_unlock(&LOCK_active);
4261
pthread_mutex_lock(&LOCK_sync);
4262
pthread_mutex_unlock(&p->lock);
4265
{ // somebody's syncing. let's wait
4268
note - it must be while (), not do ... while () here
4269
as p->state may be not DIRTY when we come here
4271
while (p->state == DIRTY && syncing)
4272
pthread_cond_wait(&p->cond, &LOCK_sync);
4274
err= p->state == ERROR;
4275
if (p->state != DIRTY) // page was synced
4277
if (p->waiters == 0)
4278
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4279
pthread_mutex_unlock(&LOCK_sync);
4280
goto done; // we're done
4282
} // page was not synced! do it now
4283
assert(active == p && syncing == 0);
4284
pthread_mutex_lock(&LOCK_active);
4285
syncing=p; // place is vacant - take it
4286
active=0; // page is not active anymore
4287
pthread_cond_broadcast(&COND_active); // in case somebody's waiting
4288
pthread_mutex_unlock(&LOCK_active);
4289
pthread_mutex_unlock(&LOCK_sync);
4293
return err ? 0 : cookie;
4296
int TC_LOG_MMAP::sync()
4300
assert(syncing != active);
4303
sit down and relax - this can take a while...
4304
note - no locks are held at this point
4306
err= msync(syncing->start, 1, MS_SYNC);
4308
err= my_sync(fd, MYF(0));
4310
/* page is synced. let's move it to the pool */
4311
pthread_mutex_lock(&LOCK_pool);
4312
pool_last->next=syncing;
4315
syncing->state= err ? ERROR : POOL;
4316
pthread_cond_broadcast(&syncing->cond); // signal "sync done"
4317
pthread_cond_signal(&COND_pool); // in case somebody's waiting
4318
pthread_mutex_unlock(&LOCK_pool);
4320
/* marking 'syncing' slot free */
4321
pthread_mutex_lock(&LOCK_sync);
4323
pthread_cond_signal(&active->cond); // wake up a new syncer
4324
pthread_mutex_unlock(&LOCK_sync);
4329
erase xid from the page, update page free space counters/pointers.
4330
cookie points directly to the memory where xid was logged.
4333
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid __attribute__((unused)))
4335
PAGE *p=pages+(cookie/tc_log_page_size);
4336
my_xid *x=(my_xid *)(data+cookie);
4339
assert(x >= p->start && x < p->end);
4342
pthread_mutex_lock(&p->lock);
4344
assert(p->free <= p->size);
4345
set_if_smaller(p->ptr, x);
4346
if (p->free == p->size) // the page is completely empty
4347
statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
4348
if (p->waiters == 0) // the page is in pool and ready to rock
4349
pthread_cond_signal(&COND_pool); // ping ... for overflow()
4350
pthread_mutex_unlock(&p->lock);
4353
void TC_LOG_MMAP::close()
4358
pthread_mutex_destroy(&LOCK_sync);
4359
pthread_mutex_destroy(&LOCK_active);
4360
pthread_mutex_destroy(&LOCK_pool);
4361
pthread_cond_destroy(&COND_pool);
4363
data[0]='A'; // garble the first (signature) byte, in case my_delete fails
4365
for (i=0; i < npages; i++)
4367
if (pages[i].ptr == 0)
4369
pthread_mutex_destroy(&pages[i].lock);
4370
pthread_cond_destroy(&pages[i].cond);
4373
my_free((uchar*)pages, MYF(0));
4375
my_munmap((char*)data, (size_t)file_length);
4377
my_close(fd, MYF(0));
4379
if (inited>=5) // cannot do in the switch because of Windows
4380
my_delete(logname, MYF(MY_WME));
4384
int TC_LOG_MMAP::recover()
4387
PAGE *p=pages, *end_p=pages+npages;
4389
if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
4391
sql_print_error("Bad magic header in tc log");
4396
the first byte after magic signature is set to current
4397
number of storage engines on startup
4399
if (data[sizeof(tc_log_magic)] != total_ha_2pc)
4401
sql_print_error("Recovery failed! You must enable "
4402
"exactly %d storage engines that support "
4403
"two-phase commit protocol",
4404
data[sizeof(tc_log_magic)]);
4408
if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
4409
sizeof(my_xid), 0, 0, MYF(0)))
4412
for ( ; p < end_p ; p++)
4414
for (my_xid *x=p->start; x < p->end; x++)
4415
if (*x && my_hash_insert(&xids, (uchar *)x))
4419
if (ha_recover(&xids))
4423
memset(data, 0, (size_t)file_length);
4429
sql_print_error("Crash recovery failed. Either correct the problem "
4430
"(if it's, for example, out of memory error) and restart, "
4431
"or delete tc log and start mysqld with "
4432
"--tc-heuristic-recover={commit|rollback}");
4438
TC_LOG_DUMMY tc_log_dummy;
4439
TC_LOG_MMAP tc_log_mmap;
4442
Perform heuristic recovery, if --tc-heuristic-recover was used.
4445
no matter whether heuristic recovery was successful or not
4446
mysqld must exit. So, return value is the same in both cases.
4449
0 no heuristic recovery was requested
4451
1 heuristic recovery was performed
4454
int TC_LOG::using_heuristic_recover()
4456
if (!tc_heuristic_recover)
4459
sql_print_information("Heuristic crash recovery mode");
4461
sql_print_error("Heuristic crash recovery failed");
4462
sql_print_information("Please restart mysqld without --tc-heuristic-recover");
4466
/****** transaction coordinator log for 2pc - binlog() based solution ******/
4467
#define TC_LOG_BINLOG DRIZZLE_BIN_LOG
4471
keep in-memory list of prepared transactions
4472
(add to list in log(), remove on unlog())
4473
and copy it to the new binlog if rotated
4474
but let's check the behaviour of tc_log_page_waits first!
4477
int TC_LOG_BINLOG::open(const char *opt_name)
4482
assert(total_ha_2pc > 1);
4483
assert(opt_name && opt_name[0]);
4485
pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
4486
pthread_cond_init (&COND_prep_xids, 0);
4488
if (!my_b_inited(&index_file))
4490
/* There was a failure to open the index file, can't open the binlog */
4495
if (using_heuristic_recover())
4497
/* generate a new binlog to mask a corrupted one */
4498
open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
4503
if ((error= find_log_pos(&log_info, NullS, 1)))
4505
if (error != LOG_INFO_EOF)
4506
sql_print_error("find_log_pos() failed (error: %d)", error);
4517
Format_description_log_event fdle(BINLOG_VERSION);
4518
char log_name[FN_REFLEN];
4520
if (! fdle.is_valid())
4525
strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
4526
} while (!(error= find_next_log(&log_info, 1)));
4528
if (error != LOG_INFO_EOF)
4530
sql_print_error("find_log_pos() failed (error: %d)", error);
4534
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
4536
sql_print_error("%s", errmsg);
4540
if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
4541
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4542
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
4544
sql_print_information("Recovering after a crash using %s", opt_name);
4545
error= recover(&log, (Format_description_log_event *)ev);
4552
my_close(file, MYF(MY_WME));
4562
/** This is called on shutdown, after ha_panic. */
4563
void TC_LOG_BINLOG::close()
4565
assert(prepared_xids==0);
4566
pthread_mutex_destroy(&LOCK_prep_xids);
4567
pthread_cond_destroy (&COND_prep_xids);
4579
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
4581
Xid_log_event xle(thd, xid);
4582
binlog_trx_data *trx_data=
4583
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
4585
We always commit the entire transaction when writing an XID. Also
4586
note that the return value is inverted.
4588
return(!binlog_end_trans(thd, trx_data, &xle, true));
4591
void TC_LOG_BINLOG::unlog(ulong cookie __attribute__((unused)),
4592
my_xid xid __attribute__((unused)))
4594
pthread_mutex_lock(&LOCK_prep_xids);
4595
assert(prepared_xids > 0);
4596
if (--prepared_xids == 0) {
4597
pthread_cond_signal(&COND_prep_xids);
4599
pthread_mutex_unlock(&LOCK_prep_xids);
4600
rotate_and_purge(0); // as ::write() did not rotate
4603
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
4609
if (! fdle->is_valid() ||
4610
hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
4611
sizeof(my_xid), 0, 0, MYF(0)))
4614
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
4616
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
4618
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
4620
if (ev->get_type_code() == XID_EVENT)
4622
Xid_log_event *xev=(Xid_log_event *)ev;
4623
uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
4627
my_hash_insert(&xids, x);
4632
if (ha_recover(&xids))
4635
free_root(&mem_root, MYF(0));
4640
free_root(&mem_root, MYF(0));
4643
sql_print_error("Crash recovery failed. Either correct the problem "
4644
"(if it's, for example, out of memory error) and restart, "
4645
"or delete (or rename) binary log and start mysqld with "
4646
"--tc-heuristic-recover={commit|rollback}");
4651
#ifdef INNODB_COMPATIBILITY_HOOKS
4653
Get the file name of the MySQL binlog.
4654
@return the name of the binlog file
4657
const char* mysql_bin_log_file_name(void)
4659
return mysql_bin_log.get_log_fname();
4662
Get the current position of the MySQL binlog.
4663
@return byte offset from the beginning of the binlog
4666
uint64_t mysql_bin_log_file_pos(void)
4668
return (uint64_t) mysql_bin_log.get_log_file()->pos_in_file;
4670
#endif /* INNODB_COMPATIBILITY_HOOKS */
4673
mysql_declare_plugin(binlog)
4675
DRIZZLE_STORAGE_ENGINE_PLUGIN,
4679
"This is a pseudo storage engine to represent the binlog in a transaction",
4681
binlog_init, /* Plugin Init */
4682
NULL, /* Plugin Deinit */
4683
NULL, /* status variables */
4684
NULL, /* system variables */
4685
NULL /* config options */
4687
mysql_declare_plugin_end;