1
/* Copyright (C) 2000-2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
24
Abort logging when we get an error in reading or writing log files
27
#include <drizzled/server_includes.h>
28
#include <drizzled/replication/replication.h>
29
#include <libdrizzle/libdrizzle.h>
30
#include <drizzled/replicator.h>
31
#include <mysys/hash.h>
32
#include <drizzled/replication/rli.h>
34
#include <mysys/my_dir.h>
37
#include <drizzled/plugin.h>
38
#include <drizzled/error.h>
39
#include <drizzled/gettext.h>
40
#include <drizzled/data_home.h>
41
#include <drizzled/log_event.h>
43
#include <drizzled/errmsg.h>
45
/* max size of the log message */
46
#define MY_OFF_T_UNDEF (~(my_off_t)0UL)
48
DRIZZLE_BIN_LOG drizzle_bin_log;
49
uint64_t sync_binlog_counter= 0; /* We should rationalize the largest possible counters for binlog sync */
51
static bool test_if_number(const char *str,
52
long *res, bool allow_wildcards);
53
static int binlog_init(void *p);
54
static int binlog_close_connection(handlerton *hton, Session *session);
55
static int binlog_savepoint_set(handlerton *hton, Session *session, void *sv);
56
static int binlog_savepoint_rollback(handlerton *hton, Session *session, void *sv);
57
static int binlog_commit(handlerton *hton, Session *session, bool all);
58
static int binlog_rollback(handlerton *hton, Session *session, bool all);
59
static int binlog_prepare(handlerton *hton, Session *session, bool all);
62
sql_print_message_func sql_print_message_handlers[3] =
64
sql_print_information,
70
char *make_default_log_name(char *buff,const char *log_ext)
72
strncpy(buff, pidfile_name, FN_REFLEN-5);
73
return fn_format(buff, buff, drizzle_data_home, log_ext,
74
MYF(MY_UNPACK_FILENAME|MY_REPLACE_EXT));
78
Helper class to hold a mutex for the duration of the
81
Eliminates the need for explicit unlocking of mutexes on, e.g.,
82
error returns. On passing a null pointer, the sentry will not do
88
Mutex_sentry(pthread_mutex_t *mutex)
92
pthread_mutex_lock(mutex);
98
pthread_mutex_unlock(m_mutex);
103
pthread_mutex_t *m_mutex;
105
// It's not allowed to copy this object in any way
106
Mutex_sentry(Mutex_sentry const&);
107
void operator=(Mutex_sentry const&);
110
handlerton *binlog_hton;
114
Save position of binary log transaction cache.
117
binlog_trans_log_savepos()
119
session The thread to take the binlog data from
120
pos Pointer to variable where the position will be stored
124
Save the current position in the binary log transaction cache into
125
the variable pointed to by 'pos'
129
binlog_trans_log_savepos(Session *, my_off_t *pos)
138
this function is mostly a placeholder.
139
conceptually, binlog initialization (now mostly done in DRIZZLE_BIN_LOG::open)
140
should be moved here.
143
int binlog_init(void *p)
145
binlog_hton= (handlerton *)p;
146
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
147
binlog_hton->savepoint_offset= sizeof(my_off_t);
148
binlog_hton->close_connection= binlog_close_connection;
149
binlog_hton->savepoint_set= binlog_savepoint_set;
150
binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
151
binlog_hton->commit= binlog_commit;
152
binlog_hton->rollback= binlog_rollback;
153
binlog_hton->prepare= binlog_prepare;
154
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
159
static int binlog_close_connection(handlerton *, Session *)
165
static int binlog_prepare(handlerton *, Session *session, bool)
169
just pretend we can do 2pc, so that MySQL won't
171
real work will be done in DRIZZLE_BIN_LOG::log_xid()
174
(void)replicator_prepare(session);
180
This function is called once after each statement.
182
It has the responsibility to flush the transaction cache to the
183
binlog file on commits.
185
@param hton The binlog handlerton.
186
@param session The client thread that executes the transaction.
187
@param all This is @c true if this is a real transaction commit, and
190
@see handlerton::commit
192
static int binlog_commit(handlerton *, Session *session, bool all)
195
Decision table for committing a transaction. The top part, the
196
*conditions* represent different cases that can occur, and hte
197
bottom part, the *actions*, represent what should be done in that
200
Real transaction 'all' was true
202
Statement in cache There were at least one statement in the
205
In transaction We are inside a transaction
207
Stmt modified non-trans The statement being committed modified a
208
non-transactional table
210
All modified non-trans Some statement before this one in the
211
transaction modified a non-transactional
215
============================= = = = = = = = = = = = = = = = =
216
Real transaction N N N N N N N N N N N N N N N N
217
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
218
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
219
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
220
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
222
Action: (C)ommit/(A)ccumulate C C - C A C - C - - - - A A - A
223
============================= = = = = = = = = = = = = = = = =
226
============================= = = = = = = = = = = = = = = = =
227
Real transaction Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y
228
Statement in cache N N N N N N N N Y Y Y Y Y Y Y Y
229
In transaction N N N N Y Y Y Y N N N N Y Y Y Y
230
Stmt modified non-trans N N Y Y N N Y Y N N Y Y N N Y Y
231
All modified non-trans N Y N Y N Y N Y N Y N Y N Y N Y
233
(C)ommit/(A)ccumulate/(-) - - - - C C - C - - - - C C - C
234
============================= = = = = = = = = = = = = = = = =
236
In other words, we commit the transaction if and only if both of
237
the following are true:
238
- We are not in a transaction and committing a statement
240
- We are in a transaction and one (or more) of the following are
243
- A full transaction is committed
247
- A non-transactional statement is committed and there is
250
Otherwise, we accumulate the statement
253
if (all || (!session_test_options(session, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
255
return replicator_end_transaction(session, all, true);
262
This function is called when a transaction involving a transactional
263
table is rolled back.
265
It has the responsibility to flush the transaction cache to the
266
binlog file. However, if the transaction does not involve
267
non-transactional tables, nothing needs to be logged.
269
@param hton The binlog handlerton.
270
@param session The client thread that executes the transaction.
271
@param all This is @c true if this is a real transaction rollback, and
274
@see handlerton::rollback
276
static int binlog_rollback(handlerton *, Session *session, bool all)
280
/* TODO: Fix return type */
281
(void)replicator_end_transaction(session, all, false);
288
How do we handle this (unlikely but legal) case:
290
[transaction] + [update to non-trans table] + [rollback to savepoint] ?
292
The problem occurs when a savepoint is before the update to the
293
non-transactional table. Then when there's a rollback to the savepoint, if we
294
simply truncate the binlog cache, we lose the part of the binlog cache where
295
the update is. If we want to not lose it, we need to write the SAVEPOINT
296
command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
297
is easy: it's just write at the end of the binlog cache, but the former
298
should be *inserted* to the place where the user called SAVEPOINT. The
299
solution is that when the user calls SAVEPOINT, we write it to the binlog
300
cache (so no need to later insert it). As transactions are never intermixed
301
in the binary log (i.e. they are serialized), we won't have conflicts with
302
savepoint names when using mysqlbinlog or in the slave SQL thread.
303
Then when ROLLBACK TO SAVEPOINT is called, if we updated some
304
non-transactional table, we don't truncate the binlog cache but instead write
305
ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
306
will chop the SAVEPOINT command from the binlog cache, which is good as in
307
that case there is no need to have it in the binlog).
310
static int binlog_savepoint_set(handlerton *, Session *session, void *sv)
313
binlog_trans_log_savepos(session, (my_off_t*) sv);
314
/* Write it to the binary log */
316
error= replicator_statement(session, session->query, session->query_length);
321
static int binlog_savepoint_rollback(handlerton *, Session *session, void *)
325
error= replicator_statement(session, session->query, session->query_length);
331
int check_binlog_magic(IO_CACHE* log, const char** errmsg)
334
assert(my_b_tell(log) == 0);
336
if (my_b_read(log, (unsigned char*) magic, sizeof(magic)))
338
*errmsg = _("I/O error reading the header from the binary log");
339
sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
343
if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
345
*errmsg = _("Binlog has bad magic number; It's not a binary log file "
346
"that can be used by this version of Drizzle");
353
File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
357
if ((file = my_open(log_file_name, O_RDONLY,
360
sql_print_error(_("Failed to open log (file '%s', errno %d)"),
361
log_file_name, my_errno);
362
*errmsg = _("Could not open log file");
365
if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
366
MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
368
sql_print_error(_("Failed to create a cache on log (file '%s')"),
370
*errmsg = _("Could not open log file");
373
if (check_binlog_magic(log,errmsg))
380
my_close(file,MYF(0));
388
Find a unique filename for 'filename.#'.
390
Set '#' to a number as low as possible.
393
nonzero if not possible to get unique filename
396
static int find_uniq_filename(char *name)
400
char buff[FN_REFLEN];
401
struct st_my_dir *dir_info;
402
register struct fileinfo *file_info;
404
size_t buf_length, length;
407
length= dirname_part(buff, name, &buf_length);
408
start= name + length;
409
end= strchr(start, '\0');
412
length= (size_t) (end-start+1);
414
if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
415
{ // This shouldn't happen
416
strcpy(end,".1"); // use name+1
419
file_info= dir_info->dir_entry;
420
for (i=dir_info->number_off_files ; i-- ; file_info++)
422
if (memcmp(file_info->name, start, length) == 0 &&
423
test_if_number(file_info->name+length, &number,0))
425
set_if_bigger(max_found,(ulong) number);
431
sprintf(end,"%06ld",max_found+1);
436
void DRIZZLE_LOG::init(enum_log_type log_type_arg,
437
enum cache_type io_cache_type_arg)
439
log_type= log_type_arg;
440
io_cache_type= io_cache_type_arg;
446
Open a (new) log file.
451
log_name The name of the log to open
452
log_type_arg The type of the log. E.g. LOG_NORMAL
453
new_name The new name for the logfile. This is only needed
454
when the method is used to open the binlog file.
455
io_cache_type_arg The type of the IO_CACHE to use for this log file
458
Open the logfile, init IO_CACHE and write startup messages
459
(in case of general and slow query logs).
466
bool DRIZZLE_LOG::open(const char *log_name, enum_log_type log_type_arg,
467
const char *new_name, enum cache_type io_cache_type_arg)
469
char buff[FN_REFLEN];
471
int open_flags= O_CREAT;
475
init(log_type_arg, io_cache_type_arg);
477
if (!(name= strdup(log_name)))
479
name= (char *)log_name; // for the error message
484
strcpy(log_file_name, new_name);
485
else if (generate_new_name(log_file_name, name))
488
if (io_cache_type == SEQ_READ_APPEND)
489
open_flags |= O_RDWR | O_APPEND;
491
open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
495
if ((file= my_open(log_file_name, open_flags,
496
MYF(MY_WME | ME_WAITTANG))) < 0 ||
497
init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
498
lseek(file, 0, SEEK_CUR), 0,
499
MYF(MY_WME | MY_NABP |
500
((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
503
if (log_type == LOG_NORMAL)
505
int len= snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
506
"started with:\nTCP Port: %d, Named Pipe: %s\n",
507
my_progname, server_version, COMPILATION_COMMENT,
509
len+= sprintf(buff+len, "Time Id Command Argument\n");
511
if (my_b_write(&log_file, (unsigned char*) buff, len) ||
512
flush_io_cache(&log_file))
516
log_state= LOG_OPENED;
520
sql_print_error(_("Could not use %s for logging (error %d). "
521
"Turning logging off for the whole duration of the "
522
"Drizzle server process. "
523
"To turn it on again: fix the cause, "
524
"shutdown the Drizzle server and restart it."),
527
my_close(file, MYF(0));
528
end_io_cache(&log_file);
534
log_state= LOG_CLOSED;
538
DRIZZLE_LOG::DRIZZLE_LOG()
539
: name(0), write_error(false), inited(false), log_type(LOG_UNKNOWN),
540
log_state(LOG_CLOSED)
543
We don't want to initialize LOCK_Log here as such initialization depends on
544
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
545
called only in main(). Doing initialization here would make it happen
548
memset(&log_file, 0, sizeof(log_file));
551
void DRIZZLE_LOG::init_pthread_objects()
555
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
563
exiting Bitmask. For the slow and general logs the only used bit is
564
LOG_CLOSE_TO_BE_OPENED. This is used if we intend to call
565
open at once after close.
568
One can do an open on the object at once after doing a close.
569
The internal structures are not freed until cleanup() is called
572
void DRIZZLE_LOG::close(uint32_t exiting)
573
{ // One can't set log_type here!
574
if (log_state == LOG_OPENED)
576
end_io_cache(&log_file);
578
if (my_sync(log_file.file, MYF(MY_WME)) && ! write_error)
581
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
584
if (my_close(log_file.file, MYF(MY_WME)) && ! write_error)
587
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
591
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
600
/** This is called only once. */
602
void DRIZZLE_LOG::cleanup()
607
(void) pthread_mutex_destroy(&LOCK_log);
614
int DRIZZLE_LOG::generate_new_name(char *new_name, const char *log_name)
616
fn_format(new_name, log_name, drizzle_data_home, "", 4);
617
if (log_type == LOG_BIN)
619
if (!fn_ext(log_name)[0])
621
if (find_uniq_filename(new_name))
623
sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
634
The following should be using fn_format(); We just need to
635
first change fn_format() to cut the file name if it's too long.
637
const char *DRIZZLE_LOG::generate_name(const char *log_name,
639
bool strip_ext, char *buff)
641
if (!log_name || !log_name[0])
643
strncpy(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
644
return (const char *)
645
fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
647
// get rid of extension if the log is binary to avoid problems
650
char *p= fn_ext(log_name);
651
uint32_t length= cmin((uint32_t)(p - log_name), FN_REFLEN);
652
strncpy(buff, log_name, length);
654
return (const char*)buff;
661
DRIZZLE_BIN_LOG::DRIZZLE_BIN_LOG()
662
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
663
need_start_event(true), m_table_map_version(0),
664
description_event_for_exec(0), description_event_for_queue(0)
667
We don't want to initialize locks here as such initialization depends on
668
safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
669
called only in main(). Doing initialization here would make it happen
672
index_file_name[0] = 0;
673
memset(&index_file, 0, sizeof(index_file));
676
/* this is called only once */
678
void DRIZZLE_BIN_LOG::cleanup()
683
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
684
delete description_event_for_queue;
685
delete description_event_for_exec;
686
(void) pthread_mutex_destroy(&LOCK_log);
687
(void) pthread_mutex_destroy(&LOCK_index);
688
(void) pthread_cond_destroy(&update_cond);
694
/* Init binlog-specific vars */
695
void DRIZZLE_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
697
no_auto_events= no_auto_events_arg;
698
max_size= max_size_arg;
703
void DRIZZLE_BIN_LOG::init_pthread_objects()
707
(void) pthread_mutex_init(&LOCK_log, MY_MUTEX_INIT_SLOW);
708
(void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
709
(void) pthread_cond_init(&update_cond, 0);
713
bool DRIZZLE_BIN_LOG::open_index_file(const char *index_file_name_arg,
714
const char *log_name)
716
File index_file_nr= -1;
717
assert(!my_b_inited(&index_file));
720
First open of this class instance
721
Create an index file that will hold all file names uses for logging.
722
Add new entries to the end of it.
724
myf opt= MY_UNPACK_FILENAME;
725
if (!index_file_name_arg)
727
index_file_name_arg= log_name; // Use same basename for index file
728
opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
730
fn_format(index_file_name, index_file_name_arg, drizzle_data_home,
732
if ((index_file_nr= my_open(index_file_name,
735
my_sync(index_file_nr, MYF(MY_WME)) ||
736
init_io_cache(&index_file, index_file_nr,
737
IO_SIZE, WRITE_CACHE,
738
lseek(index_file_nr,0,SEEK_END),
739
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
742
TODO: all operations creating/deleting the index file or a log, should
743
call my_sync_dir() or my_sync_dir_by_file() to be durable.
744
TODO: file creation should be done with my_create() not my_open().
746
if (index_file_nr >= 0)
747
my_close(index_file_nr,MYF(0));
755
Open a (new) binlog file.
757
- Open the log file and the index file. Register the new
759
- When calling this when the file is in use, you must have a locks
760
on LOCK_log and LOCK_index.
768
bool DRIZZLE_BIN_LOG::open(const char *log_name,
769
enum_log_type log_type_arg,
770
const char *new_name,
771
enum cache_type io_cache_type_arg,
772
bool no_auto_events_arg,
774
bool null_created_arg)
780
/* open the main log file */
781
if (DRIZZLE_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
782
return(1); /* all warnings issued */
784
init(no_auto_events_arg, max_size_arg);
788
assert(log_type == LOG_BIN);
791
bool write_file_name_to_index_file=0;
793
if (!my_b_filelength(&log_file))
796
The binary log file was empty (probably newly created)
797
This is the normal case and happens when the user doesn't specify
798
an extension for the binary log files.
799
In this case we write a standard header to it.
801
if (my_b_safe_write(&log_file, (unsigned char*) BINLOG_MAGIC,
802
BIN_LOG_HEADER_SIZE))
804
bytes_written+= BIN_LOG_HEADER_SIZE;
805
write_file_name_to_index_file= 1;
808
assert(my_b_inited(&index_file) != 0);
809
reinit_io_cache(&index_file, WRITE_CACHE,
810
my_b_filelength(&index_file), 0, 0);
811
if (need_start_event && !no_auto_events)
814
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
815
even if this is not the very first binlog.
817
Format_description_log_event s(BINLOG_VERSION);
819
don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
820
as we won't be able to reset it later
822
if (io_cache_type == WRITE_CACHE)
823
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
826
s.dont_set_created= null_created_arg;
827
if (s.write(&log_file))
829
bytes_written+= s.data_written;
831
if (description_event_for_queue &&
832
description_event_for_queue->binlog_version>=4)
835
This is a relay log written to by the I/O slave thread.
836
Write the event so that others can later know the format of this relay
838
Note that this event is very close to the original event from the
839
master (it has binlog version of the master, event types of the
840
master), so this is suitable to parse the next relay log's event. It
842
Format_description_log_event::Format_description_log_event(char* buf,).
843
Why don't we want to write the description_event_for_queue if this
844
event is for format<4 (3.23 or 4.x): this is because in that case, the
845
description_event_for_queue describes the data received from the
846
master, but not the data written to the relay log (*conversion*),
847
which is in format 4 (slave's).
850
Set 'created' to 0, so that in next relay logs this event does not
851
trigger cleaning actions on the slave in
852
Format_description_log_event::apply_event_impl().
854
description_event_for_queue->created= 0;
855
/* Don't set log_pos in event header */
856
description_event_for_queue->artificial_event=1;
858
if (description_event_for_queue->write(&log_file))
860
bytes_written+= description_event_for_queue->data_written;
862
if (flush_io_cache(&log_file) ||
863
my_sync(log_file.file, MYF(MY_WME)))
866
if (write_file_name_to_index_file)
869
As this is a new log file, we write the file name to the index
870
file. As every time we write to the index file, we sync it.
872
if (my_b_write(&index_file, (unsigned char*) log_file_name,
873
strlen(log_file_name)) ||
874
my_b_write(&index_file, (unsigned char*) "\n", 1) ||
875
flush_io_cache(&index_file) ||
876
my_sync(index_file.file, MYF(MY_WME)))
880
log_state= LOG_OPENED;
885
sql_print_error(_("Could not use %s for logging (error %d). "
886
"Turning logging off for the whole duration of the "
887
"Drizzle server process. "
888
"To turn it on again: fix the cause, "
889
"shutdown the Drizzle server and restart it."),
892
my_close(file,MYF(0));
893
end_io_cache(&log_file);
894
end_io_cache(&index_file);
900
log_state= LOG_CLOSED;
905
int DRIZZLE_BIN_LOG::get_current_log(LOG_INFO* linfo)
907
pthread_mutex_lock(&LOCK_log);
908
int ret = raw_get_current_log(linfo);
909
pthread_mutex_unlock(&LOCK_log);
913
int DRIZZLE_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
915
strncpy(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
916
linfo->pos = my_b_tell(&log_file);
921
Move all data up in a file in an filename index file.
923
We do the copy outside of the IO_CACHE as the cache buffers would just
924
make things slower and more complicated.
925
In most cases the copy loop should only do one read.
927
@param index_file File to move
928
@param offset Move everything from here to beginning
931
File will be truncated to be 'offset' shorter or filled up with newlines
937
static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
940
my_off_t init_offset= offset;
941
File file= index_file->file;
942
unsigned char io_buf[IO_SIZE*2];
944
for (;; offset+= bytes_read)
946
(void) lseek(file, offset, SEEK_SET);
947
if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
951
break; // end of file
952
(void) lseek(file, offset-init_offset, SEEK_SET);
953
if (my_write(file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
956
/* The following will either truncate the file or fill the end with \n' */
957
if (ftruncate(file, offset - init_offset) || my_sync(file, MYF(MY_WME)))
960
/* Reset data in old index cache */
961
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
969
Find the position in the log-index-file for the given log name.
971
@param linfo Store here the found log file name and position to
972
the NEXT log file name in the index file.
973
@param log_name Filename to find in the index file.
974
Is a null pointer if we want to read the first entry
975
@param need_lock Set this to 1 if the parent doesn't already have a
979
On systems without the truncate function the file will end with one or
980
more empty lines. These will be ignored when reading the file.
985
LOG_INFO_EOF End of log-index-file found
987
LOG_INFO_IO Got IO error while reading file
990
int DRIZZLE_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
994
char *fname= linfo->log_file_name;
995
uint32_t log_name_len= log_name ? (uint) strlen(log_name) : 0;
998
Mutex needed because we need to make sure the file pointer does not
999
move from under our feet
1002
pthread_mutex_lock(&LOCK_index);
1003
safe_mutex_assert_owner(&LOCK_index);
1005
/* As the file is flushed, we can't get an error here */
1006
(void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
1011
my_off_t offset= my_b_tell(&index_file);
1012
/* If we get 0 or 1 characters, this is the end of the file */
1014
if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
1016
/* Did not find the given entry; Return not found or error */
1017
error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
1021
// if the log entry matches, null string matching anything
1023
(log_name_len == length-1 && fname[log_name_len] == '\n' &&
1024
!memcmp(fname, log_name, log_name_len)))
1026
fname[length-1]=0; // remove last \n
1027
linfo->index_file_start_offset= offset;
1028
linfo->index_file_offset = my_b_tell(&index_file);
1034
pthread_mutex_unlock(&LOCK_index);
1040
Find the position in the log-index-file for the given log name.
1043
linfo Store here the next log file name and position to
1044
the file name after that.
1046
need_lock Set this to 1 if the parent doesn't already have a
1050
- Before calling this function, one has to call find_log_pos()
1052
- Mutex needed because we need to make sure the file pointer does not move
1058
LOG_INFO_EOF End of log-index-file found
1060
LOG_INFO_IO Got IO error while reading file
1063
int DRIZZLE_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
1067
char *fname= linfo->log_file_name;
1070
pthread_mutex_lock(&LOCK_index);
1071
safe_mutex_assert_owner(&LOCK_index);
1073
/* As the file is flushed, we can't get an error here */
1074
(void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
1077
linfo->index_file_start_offset= linfo->index_file_offset;
1078
if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
1080
error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
1083
fname[length-1]=0; // kill \n
1084
linfo->index_file_offset = my_b_tell(&index_file);
1088
pthread_mutex_unlock(&LOCK_index);
1094
Delete all logs refered to in the index file.
1095
Start writing to a new log file.
1097
The new index file will only contain this file.
1099
@param session Thread
1102
If not called from slave thread, write start event to new log
1110
bool DRIZZLE_BIN_LOG::reset_logs(Session* session)
1114
const char* save_name;
1117
We need to get both locks to be sure that no one is trying to
1118
write to the index log file.
1120
pthread_mutex_lock(&LOCK_log);
1121
pthread_mutex_lock(&LOCK_index);
1124
The following mutex is needed to ensure that no threads call
1125
'delete session' as we would then risk missing a 'rollback' from this
1126
thread. If the transaction involved MyISAM tables, it should go
1127
into binlog even on rollback.
1129
pthread_mutex_lock(&LOCK_thread_count);
1131
/* Save variables so that we can reopen the log */
1133
name=0; // Protect against free
1134
close(LOG_CLOSE_TO_BE_OPENED);
1136
/* First delete all old log files */
1138
if (find_log_pos(&linfo, NULL, 0))
1146
if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
1148
if (my_errno == ENOENT)
1150
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1151
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
1152
linfo.log_file_name);
1153
sql_print_information(_("Failed to delete file '%s'"),
1154
linfo.log_file_name);
1160
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
1161
ER_BINLOG_PURGE_FATAL_ERR,
1162
_("a problem with deleting %s; "
1163
"consider examining correspondence "
1164
"of your binlog index file "
1165
"to the actual binlog files"),
1166
linfo.log_file_name);
1171
if (find_next_log(&linfo, 0))
1175
/* Start logging with a new file */
1176
close(LOG_CLOSE_INDEX);
1177
if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
1179
if (my_errno == ENOENT)
1181
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1182
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
1184
sql_print_information(_("Failed to delete file '%s'"),
1191
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
1192
ER_BINLOG_PURGE_FATAL_ERR,
1193
"a problem with deleting %s; "
1194
"consider examining correspondence "
1195
"of your binlog index file "
1196
"to the actual binlog files",
1202
if (!session->slave_thread)
1204
if (!open_index_file(index_file_name, 0))
1205
open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0);
1206
free((unsigned char*) save_name);
1209
pthread_mutex_unlock(&LOCK_thread_count);
1210
pthread_mutex_unlock(&LOCK_index);
1211
pthread_mutex_unlock(&LOCK_log);
1217
Delete relay log files prior to rli->group_relay_log_name
1218
(i.e. all logs which are not involved in a non-finished group
1219
(transaction)), remove them from the index file and start on next
1223
- Protects index file with LOCK_index
1224
- Delete relevant relay log files
1225
- Copy all file names after these ones to the front of the index file
1226
- If the OS has truncate, truncate the file, else fill it with \n'
1227
- Read the next file name from the index file and store in rli->linfo
1229
@param rli Relay log information
1230
@param included If false, all relay logs that are strictly before
1231
rli->group_relay_log_name are deleted ; if true, the
1232
latter is deleted too (i.e. all relay logs
1233
read by the SQL slave thread are deleted).
1236
- This is only called from the slave-execute thread when it has read
1237
all commands from a relay log and want to switch to a new relay log.
1238
- When this happens, we can be in an active transaction as
1239
a transaction can span over two relay logs
1240
(although it is always written as a single block to the master's binary
1241
log, hence cannot span over two master's binary logs).
1246
LOG_INFO_EOF End of log-index-file found
1248
LOG_INFO_SEEK Could not allocate IO cache
1250
LOG_INFO_IO Got IO error while reading file
1254
int DRIZZLE_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
1259
assert(rli->slave_running == 1);
1260
assert(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name.c_str()));
1262
pthread_mutex_lock(&LOCK_index);
1263
pthread_mutex_lock(&rli->log_space_lock);
1264
rli->relay_log.purge_logs(rli->group_relay_log_name.c_str(), included,
1265
0, 0, &rli->log_space_total);
1266
// Tell the I/O thread to take the relay_log_space_limit into account
1267
rli->ignore_log_space_limit= 0;
1268
pthread_mutex_unlock(&rli->log_space_lock);
1271
Ok to broadcast after the critical region as there is no risk of
1272
the mutex being destroyed by this thread later - this helps save
1275
pthread_cond_broadcast(&rli->log_space_cond);
1278
Read the next log file name from the index file and pass it back to
1280
If included is true, we want the first relay log;
1281
otherwise we want the one after event_relay_log_name.
1283
if ((included && (error=find_log_pos(&rli->linfo, NULL, 0))) ||
1285
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name.c_str(), 0)) ||
1286
(error=find_next_log(&rli->linfo, 0)))))
1289
sql_print_error(_("next log error: %d offset: %s log: %s included: %d"),
1291
llstr(rli->linfo.index_file_offset,buff),
1292
rli->group_relay_log_name.c_str(),
1298
Reset rli's coordinates to the current log.
1300
rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
1301
rli->event_relay_log_name.assign(rli->linfo.log_file_name);
1304
If we removed the rli->group_relay_log_name file,
1305
we must update the rli->group* coordinates, otherwise do not touch it as the
1306
group's execution is not finished (e.g. COMMIT not executed)
1310
rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
1311
rli->group_relay_log_name.assign(rli->linfo.log_file_name);
1312
rli->notify_group_relay_log_name_update();
1315
/* Store where we are in the new file for the execution thread */
1316
flush_relay_log_info(rli);
1319
pthread_mutex_unlock(&LOCK_index);
1324
Update log index_file.
1327
int DRIZZLE_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
1329
if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
1332
// now update offsets in index file for running threads
1333
if (need_update_threads)
1334
adjust_linfo_offsets(log_info->index_file_start_offset);
1339
Remove all logs before the given log from disk and from the index file.
1341
@param to_log Delete all log file name before this file.
1342
@param included If true, to_log is deleted too.
1344
@param need_update_threads If we want to update the log coordinates of
1345
all threads. False for relay logs, true otherwise.
1346
@param freed_log_space If not null, decrement this variable of
1347
the amount of log space freed
1350
If any of the logs before the deleted one is in use,
1351
only purge logs up to this one.
1356
LOG_INFO_EOF to_log not found
1357
LOG_INFO_EMFILE too many files opened
1358
LOG_INFO_FATAL if any other than ENOENT error from
1359
stat() or my_delete()
1362
int DRIZZLE_BIN_LOG::purge_logs(const char *to_log,
1365
bool need_update_threads,
1366
uint64_t *decrease_log_space)
1374
pthread_mutex_lock(&LOCK_index);
1375
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
1379
File name exists in index file; delete until we find this file
1380
or a file that is used.
1382
if ((error=find_log_pos(&log_info, NULL, 0 /*no mutex*/)))
1384
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
1385
!log_in_use(log_info.log_file_name))
1388
if (stat(log_info.log_file_name, &s))
1390
if (errno == ENOENT)
1393
It's not fatal if we can't stat a log file that does not exist;
1394
If we could not stat, we won't delete.
1396
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1397
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
1398
log_info.log_file_name);
1399
sql_print_information(_("Failed to execute stat() on file '%s'"),
1400
log_info.log_file_name);
1406
Other than ENOENT are fatal
1408
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
1409
ER_BINLOG_PURGE_FATAL_ERR,
1410
_("a problem with getting info on being purged %s; "
1411
"consider examining correspondence "
1412
"of your binlog index file "
1413
"to the actual binlog files"),
1414
log_info.log_file_name);
1415
error= LOG_INFO_FATAL;
1421
if (!my_delete(log_info.log_file_name, MYF(0)))
1423
if (decrease_log_space)
1424
*decrease_log_space-= s.st_size;
1428
if (my_errno == ENOENT)
1430
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1431
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
1432
log_info.log_file_name);
1433
sql_print_information(_("Failed to delete file '%s'"),
1434
log_info.log_file_name);
1439
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
1440
ER_BINLOG_PURGE_FATAL_ERR,
1441
_("a problem with deleting %s; "
1442
"consider examining correspondence "
1443
"of your binlog index file "
1444
"to the actual binlog files"),
1445
log_info.log_file_name);
1446
if (my_errno == EMFILE)
1448
error= LOG_INFO_EMFILE;
1450
error= LOG_INFO_FATAL;
1456
if (find_next_log(&log_info, 0) || exit_loop)
1461
If we get killed -9 here, the sysadmin would have to edit
1462
the log index file after restart - otherwise, this should be safe
1464
error= update_log_index(&log_info, need_update_threads);
1471
pthread_mutex_unlock(&LOCK_index);
1476
Remove all logs before the given file date from disk and from the
1479
@param session Thread pointer
1480
@param before_date Delete all log files before given date.
1483
If any of the logs before the deleted one is in use,
1484
only purge logs up to this one.
1489
LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
1490
LOG_INFO_FATAL if any other than ENOENT error from
1491
stat() or my_delete()
1494
int DRIZZLE_BIN_LOG::purge_logs_before_date(time_t purge_time)
1498
struct stat stat_area;
1500
pthread_mutex_lock(&LOCK_index);
1503
Delete until we find curren file
1504
or a file that is used or a file
1505
that is older than purge_time.
1507
if ((error=find_log_pos(&log_info, NULL, 0 /*no mutex*/)))
1510
while (strcmp(log_file_name, log_info.log_file_name) &&
1511
!log_in_use(log_info.log_file_name))
1513
if (stat(log_info.log_file_name, &stat_area))
1515
if (errno == ENOENT)
1518
It's not fatal if we can't stat a log file that does not exist.
1520
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1521
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
1522
log_info.log_file_name);
1523
sql_print_information(_("Failed to execute stat() on file '%s'"),
1524
log_info.log_file_name);
1530
Other than ENOENT are fatal
1532
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
1533
ER_BINLOG_PURGE_FATAL_ERR,
1534
_("a problem with getting info on being purged %s; "
1535
"consider examining correspondence "
1536
"of your binlog index file "
1537
"to the actual binlog files"),
1538
log_info.log_file_name);
1539
error= LOG_INFO_FATAL;
1545
if (stat_area.st_mtime >= purge_time)
1547
if (my_delete(log_info.log_file_name, MYF(0)))
1549
if (my_errno == ENOENT)
1551
/* It's not fatal even if we can't delete a log file */
1552
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1553
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
1554
log_info.log_file_name);
1555
sql_print_information(_("Failed to delete file '%s'"),
1556
log_info.log_file_name);
1561
push_warning_printf(current_session, DRIZZLE_ERROR::WARN_LEVEL_ERROR,
1562
ER_BINLOG_PURGE_FATAL_ERR,
1563
_("a problem with deleting %s; "
1564
"consider examining correspondence "
1565
"of your binlog index file "
1566
"to the actual binlog files"),
1567
log_info.log_file_name);
1568
error= LOG_INFO_FATAL;
1573
if (find_next_log(&log_info, 0))
1578
If we get killed -9 here, the sysadmin would have to edit
1579
the log index file after restart - otherwise, this should be safe
1581
error= update_log_index(&log_info, 1);
1584
pthread_mutex_unlock(&LOCK_index);
1590
Create a new log file name.
1592
@param buf buf of at least FN_REFLEN where new name is stored
1595
If file name will be longer then FN_REFLEN it will be truncated
1598
void DRIZZLE_BIN_LOG::make_log_name(char* buf, const char* log_ident)
1600
uint32_t dir_len = dirname_length(log_file_name);
1601
if (dir_len >= FN_REFLEN)
1602
dir_len=FN_REFLEN-1;
1603
strncpy(buf, log_file_name, dir_len);
1604
strncpy(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
1609
Check if we are writing/reading to the given log file.
1612
bool DRIZZLE_BIN_LOG::is_active(const char *log_file_name_arg)
1614
return !strcmp(log_file_name, log_file_name_arg);
1619
Wrappers around new_file_impl to avoid using argument
1620
to control locking. The argument 1) less readable 2) breaks
1621
incapsulation 3) allows external access to the class without
1622
a lock (which is not possible with private new_file_without_locking
1626
void DRIZZLE_BIN_LOG::new_file()
1632
void DRIZZLE_BIN_LOG::new_file_without_locking()
1639
Start writing to a new log file or reopen the old file.
1641
@param need_lock Set to 1 if caller has not locked LOCK_log
1644
The new file name is stored last in the index file
1647
void DRIZZLE_BIN_LOG::new_file_impl(bool need_lock)
1649
char new_name[FN_REFLEN], *new_name_ptr, *old_name;
1657
pthread_mutex_lock(&LOCK_log);
1658
pthread_mutex_lock(&LOCK_index);
1660
safe_mutex_assert_owner(&LOCK_log);
1661
safe_mutex_assert_owner(&LOCK_index);
1664
if binlog is used as tc log, be sure all xids are "unlogged",
1665
so that on recover we only need to scan one - latest - binlog file
1666
for prepared xids. As this is expected to be a rare event,
1667
simple wait strategy is enough. We're locking LOCK_log to be sure no
1668
new Xid_log_event's are added to the log (and prepared_xids is not
1669
increased), and waiting on COND_prep_xids for late threads to
1674
tc_log_page_waits++;
1675
pthread_mutex_lock(&LOCK_prep_xids);
1676
while (prepared_xids) {
1677
pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
1679
pthread_mutex_unlock(&LOCK_prep_xids);
1682
/* Reuse old name if not binlog and not update log */
1686
If user hasn't specified an extension, generate a new log name
1687
We have to do this here and not in open as we want to store the
1688
new file name in the current binary log file.
1690
if (generate_new_name(new_name, name))
1692
new_name_ptr=new_name;
1694
if (log_type == LOG_BIN)
1696
if (!no_auto_events)
1699
We log the whole file name for log file as the user may decide
1700
to change base names at some point.
1702
Rotate_log_event r(new_name+dirname_length(new_name),
1703
0, LOG_EVENT_OFFSET, 0);
1705
bytes_written += r.data_written;
1708
Update needs to be signalled even if there is no rotate event
1709
log rotation should give the waiting thread a signal to
1710
discover EOF and move on to the next log.
1715
name=0; // Don't free name
1716
close(LOG_CLOSE_TO_BE_OPENED);
1719
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
1723
new_file() is only used for rotation (in FLUSH LOGS or because size >
1724
max_binlog_size or max_relay_log_size).
1725
If this is a binary log, the Format_description_log_event at the beginning of
1726
the new file should have created=0 (to distinguish with the
1727
Format_description_log_event written at server startup, which should
1728
trigger temp tables deletion on slaves.
1731
open(old_name, log_type, new_name_ptr,
1732
io_cache_type, no_auto_events, max_size, 1);
1737
pthread_mutex_unlock(&LOCK_log);
1738
pthread_mutex_unlock(&LOCK_index);
1744
bool DRIZZLE_BIN_LOG::append(Log_event* ev)
1747
pthread_mutex_lock(&LOCK_log);
1749
assert(log_file.type == SEQ_READ_APPEND);
1751
Log_event::write() is smart enough to use my_b_write() or
1752
my_b_append() depending on the kind of cache we have.
1754
if (ev->write(&log_file))
1759
bytes_written+= ev->data_written;
1760
if ((uint) my_b_append_tell(&log_file) > max_size)
1761
new_file_without_locking();
1764
pthread_mutex_unlock(&LOCK_log);
1765
signal_update(); // Safe as we don't call close
1770
bool DRIZZLE_BIN_LOG::appendv(const char* buf, uint32_t len,...)
1776
assert(log_file.type == SEQ_READ_APPEND);
1778
safe_mutex_assert_owner(&LOCK_log);
1781
if (my_b_append(&log_file,(unsigned char*) buf,len))
1786
bytes_written += len;
1787
} while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
1788
if ((uint) my_b_append_tell(&log_file) > max_size)
1789
new_file_without_locking();
1798
bool DRIZZLE_BIN_LOG::flush_and_sync()
1800
int err=0, fd=log_file.file;
1801
safe_mutex_assert_owner(&LOCK_log);
1802
if (flush_io_cache(&log_file))
1804
if (++sync_binlog_counter >= sync_binlog_period && sync_binlog_period)
1806
sync_binlog_counter= 0;
1807
err=my_sync(fd, MYF(MY_WME));
1812
void DRIZZLE_BIN_LOG::start_union_events(Session *session, query_id_t query_id_param)
1814
assert(!session->binlog_evt_union.do_union);
1815
session->binlog_evt_union.do_union= true;
1816
session->binlog_evt_union.unioned_events= false;
1817
session->binlog_evt_union.unioned_events_trans= false;
1818
session->binlog_evt_union.first_query_id= query_id_param;
1821
void DRIZZLE_BIN_LOG::stop_union_events(Session *session)
1823
assert(session->binlog_evt_union.do_union);
1824
session->binlog_evt_union.do_union= false;
1827
bool DRIZZLE_BIN_LOG::is_query_in_union(Session *session, query_id_t query_id_param)
1829
return (session->binlog_evt_union.do_union &&
1830
query_id_param >= session->binlog_evt_union.first_query_id);
1834
Moves the last bunch of rows from the pending Rows event to the binlog
1835
(either cached binlog if transaction, or disk binlog). Sets a new pending
1839
DRIZZLE_BIN_LOG::flush_and_set_pending_rows_event(Session *, Rows_log_event*)
1841
assert(drizzle_bin_log.is_open());
1847
Write an event to the binary log.
1850
bool DRIZZLE_BIN_LOG::write(Log_event *event_info)
1852
Session *session= event_info->session;
1855
if (session->binlog_evt_union.do_union)
1858
In Stored function; Remember that function call caused an update.
1859
We will log the function call to the binary log on function exit
1861
session->binlog_evt_union.unioned_events= true;
1862
session->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
1867
Flush the pending rows event to the transaction cache or to the
1868
log file. Since this function potentially aquire the LOCK_log
1869
mutex, we do this before aquiring the LOCK_log mutex in this
1872
We only end the statement if we are in a top-level statement. If
1873
we are inside a stored function, we do not end the statement since
1874
this will close all tables on the slave.
1877
pthread_mutex_lock(&LOCK_log);
1880
In most cases this is only called if 'is_open()' is true; in fact this is
1881
mostly called if is_open() *was* true a few instructions before, but it
1882
could have changed since.
1884
if (likely(is_open()))
1886
IO_CACHE *file= &log_file;
1888
In the future we need to add to the following if tests like
1889
"do the involved tables match (to be implemented)
1890
binlog_[wild_]{do|ignore}_table?" (WL#1049)"
1892
if (session && !(session->options & OPTION_BIN_LOG))
1894
pthread_mutex_unlock(&LOCK_log);
1899
Write the SQL command
1902
if (event_info->write(file))
1905
if (file == &log_file) // we are writing to the real log (disk)
1907
if (flush_and_sync())
1910
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
1917
if (my_errno == EFBIG)
1918
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
1920
my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
1925
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
1926
++m_table_map_version;
1928
pthread_mutex_unlock(&LOCK_log);
1932
void DRIZZLE_BIN_LOG::rotate_and_purge(uint32_t flags)
1934
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
1935
pthread_mutex_lock(&LOCK_log);
1936
if ((flags & RP_FORCE_ROTATE) ||
1937
(my_b_tell(&log_file) >= (my_off_t) max_size))
1939
new_file_without_locking();
1940
if (expire_logs_days)
1942
time_t purge_time= time(0) - expire_logs_days*24*60*60;
1943
if (purge_time >= 0)
1944
purge_logs_before_date(purge_time);
1947
if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED))
1948
pthread_mutex_unlock(&LOCK_log);
1951
uint32_t DRIZZLE_BIN_LOG::next_file_id()
1954
pthread_mutex_lock(&LOCK_log);
1956
pthread_mutex_unlock(&LOCK_log);
1962
Write the contents of a cache to the binary log.
1966
cache Cache to write to the binary log
1967
lock_log True if the LOCK_log mutex should be aquired, false otherwise
1968
sync_log True if the log should be flushed and sync:ed
1971
Write the contents of the cache to the binary log. The cache will
1972
be reset as a READ_CACHE to be able to read the contents from it.
1975
int DRIZZLE_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
1977
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
1979
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
1980
return ER_ERROR_ON_WRITE;
1981
uint32_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
1983
unsigned char header[LOG_EVENT_HEADER_LEN];
1986
The events in the buffer have incorrect end_log_pos data
1987
(relative to beginning of group rather than absolute),
1988
so we'll recalculate them in situ so the binlog is always
1989
correct, even in the middle of a group. This is possible
1990
because we now know the start position of the group (the
1991
offset of this cache in the log, if you will); all we need
1992
to do is to find all event-headers, and add the position of
1993
the group to the end_log_pos of each event. This is pretty
1994
straight forward, except that we read the cache in segments,
1995
so an event-header might end up on the cache-border and get
1999
group= (uint)my_b_tell(&log_file);
2006
if we only got a partial header in the last iteration,
2007
get the other half now and process a full header.
2009
if (unlikely(carry > 0))
2011
assert(carry < LOG_EVENT_HEADER_LEN);
2013
/* assemble both halves */
2014
memcpy(&header[carry], cache->read_pos, LOG_EVENT_HEADER_LEN - carry);
2016
/* fix end_log_pos */
2017
val= uint4korr(&header[LOG_POS_OFFSET]) + group;
2018
int4store(&header[LOG_POS_OFFSET], val);
2020
/* write the first half of the split header */
2021
if (my_b_write(&log_file, header, carry))
2022
return ER_ERROR_ON_WRITE;
2025
copy fixed second half of header to cache so the correct
2026
version will be written later.
2028
memcpy(cache->read_pos, &header[carry], LOG_EVENT_HEADER_LEN - carry);
2030
/* next event header at ... */
2031
hdr_offs = uint4korr(&header[EVENT_LEN_OFFSET]) - carry;
2036
/* if there is anything to write, process it. */
2038
if (likely(length > 0))
2041
process all event-headers in this (partial) cache.
2042
if next header is beyond current read-buffer,
2043
we'll get it later (though not necessarily in the
2044
very next iteration, just "eventually").
2047
while (hdr_offs < length)
2050
partial header only? save what we can get, process once
2054
if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
2056
carry= length - hdr_offs;
2057
memcpy(header, cache->read_pos + hdr_offs, carry);
2062
/* we've got a full event-header, and it came in one piece */
2064
unsigned char *log_pos= (unsigned char *)cache->read_pos + hdr_offs + LOG_POS_OFFSET;
2066
/* fix end_log_pos */
2067
val= uint4korr(log_pos) + group;
2068
int4store(log_pos, val);
2070
/* next event header at ... */
2071
log_pos= (unsigned char *)cache->read_pos + hdr_offs + EVENT_LEN_OFFSET;
2072
hdr_offs += uint4korr(log_pos);
2078
Adjust hdr_offs. Note that it may still point beyond the segment
2079
read in the next iteration; if the current event is very long,
2080
it may take a couple of read-iterations (and subsequent adjustments
2081
of hdr_offs) for it to point into the then-current segment.
2082
If we have a split header (!carry), hdr_offs will be set at the
2083
beginning of the next iteration, overwriting the value we set here:
2088
/* Write data to the binary log file */
2089
if (my_b_write(&log_file, cache->read_pos, length))
2090
return ER_ERROR_ON_WRITE;
2091
cache->read_pos=cache->read_end; // Mark buffer used up
2092
} while ((length= my_b_fill(cache)));
2103
Write a cached log entry to the binary log.
2104
- To support transaction over replication, we wrap the transaction
2105
with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
2106
We want to write a BEGIN/ROLLBACK block when a non-transactional table
2107
was updated in a transaction which was rolled back. This is to ensure
2108
that the same updates are run on the slave.
2111
@param cache The cache to copy to the binlog
2112
@param commit_event The commit event to print after writing the
2113
contents of the cache.
2116
We only come here if there is something in the cache.
2118
The thing in the cache is always a complete transaction.
2120
'cache' needs to be reinitialized after this functions returns.
2123
bool DRIZZLE_BIN_LOG::write(Session *session, IO_CACHE *cache, Log_event *commit_event)
2125
pthread_mutex_lock(&LOCK_log);
2127
/* NULL would represent nothing to replicate after ROLLBACK */
2128
assert(commit_event != NULL);
2131
if (likely(is_open())) // Should always be true
2134
We only bother to write to the binary log if there is anything
2137
if (my_b_tell(cache) > 0)
2140
Log "BEGIN" at the beginning of every transaction. Here, a
2141
transaction is either a BEGIN..COMMIT block or a single
2142
statement in autocommit mode.
2144
Query_log_event qinfo(session, STRING_WITH_LEN("BEGIN"), true, false);
2146
Imagine this is rollback due to net timeout, after all
2147
statements of the transaction succeeded. Then we want a
2148
zero-error code in BEGIN. In other words, if there was a
2149
really serious error code it's already in the statement's
2150
events, there is no need to put it also in this internally
2151
generated event, and as this event is generated late it would
2152
lead to false alarms.
2154
This is safer than session->clear_error() against kills at shutdown.
2156
qinfo.error_code= 0;
2158
Now this Query_log_event has artificial log_pos 0. It must be
2159
adjusted to reflect the real position in the log. Not doing it
2160
would confuse the slave: it would prevent this one from
2161
knowing where he is in the master's binlog, which would result
2162
in wrong positions being shown to the user, MASTER_POS_WAIT
2165
if (qinfo.write(&log_file))
2168
if ((write_error= write_cache(cache, false, false)))
2171
if (commit_event && commit_event->write(&log_file))
2173
if (flush_and_sync())
2175
if (cache->error) // Error on read
2177
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
2178
write_error=1; // Don't give more errors
2185
if commit_event is Xid_log_event, increase the number of
2186
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
2187
if there're prepared xids in it - see the comment in new_file() for
2189
If the commit_event is not Xid_log_event (then it's a Query_log_event)
2190
rotate binlog, if necessary.
2192
if (commit_event && commit_event->get_type_code() == XID_EVENT)
2194
pthread_mutex_lock(&LOCK_prep_xids);
2196
pthread_mutex_unlock(&LOCK_prep_xids);
2199
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
2201
pthread_mutex_unlock(&LOCK_log);
2209
sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
2211
pthread_mutex_unlock(&LOCK_log);
2217
Wait until we get a signal that the relay log has been updated
2219
@param[in] session a Session struct
2221
LOCK_log must be taken before calling this function.
2222
It will be released at the end of the function.
2225
void DRIZZLE_BIN_LOG::wait_for_update_relay_log(Session* session)
2227
const char *old_msg;
2228
old_msg= session->enter_cond(&update_cond, &LOCK_log,
2229
"Slave has read all relay log; "
2230
"waiting for the slave I/O "
2231
"thread to update it" );
2232
pthread_cond_wait(&update_cond, &LOCK_log);
2233
session->exit_cond(old_msg);
2239
Wait until we get a signal that the binary log has been updated.
2240
Applies to master only.
2243
@param[in] session a Session struct
2244
@param[in] timeout a pointer to a timespec;
2245
NULL means to wait w/o timeout.
2246
@retval 0 if got signalled on update
2247
@retval non-0 if wait timeout elapsed
2249
LOCK_log must be taken before calling this function.
2250
LOCK_log is being released while the thread is waiting.
2251
LOCK_log is released by the caller.
2254
int DRIZZLE_BIN_LOG::wait_for_update_bin_log(Session* session,
2255
const struct timespec *timeout)
2258
const char* old_msg = session->get_proc_info();
2259
old_msg= session->enter_cond(&update_cond, &LOCK_log,
2260
"Master has sent all binlog to slave; "
2261
"waiting for binlog to be updated");
2263
pthread_cond_wait(&update_cond, &LOCK_log);
2265
ret= pthread_cond_timedwait(&update_cond, &LOCK_log,
2266
const_cast<struct timespec *>(timeout));
2274
@param exiting Bitmask for one or more of the following bits:
2275
- LOG_CLOSE_INDEX : if we should close the index file
2276
- LOG_CLOSE_TO_BE_OPENED : if we intend to call open
2277
at once after close.
2278
- LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
2281
One can do an open on the object at once after doing a close.
2282
The internal structures are not freed until cleanup() is called
2285
void DRIZZLE_BIN_LOG::close(uint32_t exiting)
2286
{ // One can't set log_type here!
2287
if (log_state == LOG_OPENED)
2289
if (log_type == LOG_BIN && !no_auto_events &&
2290
(exiting & LOG_CLOSE_STOP_EVENT))
2294
bytes_written+= s.data_written;
2298
/* don't pwrite in a file opened with O_APPEND - it doesn't work */
2299
if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
2301
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
2302
unsigned char flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
2303
assert(pwrite(log_file.file, &flags, 1, offset)==1);
2306
/* this will cleanup IO_CACHE, sync and close the file */
2307
DRIZZLE_LOG::close(exiting);
2311
The following test is needed even if is_open() is not set, as we may have
2312
called a not complete close earlier and the index file is still open.
2315
if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
2317
end_io_cache(&index_file);
2318
if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
2321
sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
2324
log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2334
void DRIZZLE_BIN_LOG::set_max_size(ulong max_size_arg)
2337
We need to take locks, otherwise this may happen:
2338
new_file() is called, calls open(old_max_size), then before open() starts,
2339
set_max_size() sets max_size to max_size_arg, then open() starts and
2340
uses the old_max_size argument, so max_size_arg has been overwritten and
2341
it's like if the SET command was never run.
2343
pthread_mutex_lock(&LOCK_log);
2345
max_size= max_size_arg;
2346
pthread_mutex_unlock(&LOCK_log);
2352
Check if a string is a valid number.
2354
@param str String to test
2355
@param res Store value here
2356
@param allow_wildcards Set to 1 if we should ignore '%' and '_'
2359
For the moment the allow_wildcards argument is not used
2360
Should be move to some other file.
2363
1 String is a number
2368
static bool test_if_number(register const char *str,
2369
long *res, bool allow_wildcards)
2376
while (*str++ == ' ') ;
2377
if (*--str == '-' || *str == '+')
2379
while (my_isdigit(files_charset_info,*str) ||
2380
(allow_wildcards && (*str == wild_many || *str == wild_one)))
2388
my_isdigit(files_charset_info,*str) ||
2389
(allow_wildcards && (*str == wild_many || *str == wild_one)) ;
2392
if (*str != 0 || flag == 0)
2396
return(1); /* Number ok */
2397
} /* test_if_number */
2400
void sql_perror(const char *message)
2402
sql_print_error("%s: %s",message, strerror(errno));
2406
bool flush_error_log()
2411
pthread_mutex_lock(&LOCK_error_log);
2412
if (freopen(log_error_file,"a+",stdout)==NULL)
2414
if (freopen(log_error_file,"a+",stderr)==NULL)
2416
pthread_mutex_unlock(&LOCK_error_log);
2421
void DRIZZLE_BIN_LOG::signal_update()
2423
pthread_cond_broadcast(&update_cond);
2427
void sql_print_error(const char *format, ...)
2431
va_start(args, format);
2432
errmsg_vprintf (current_session, ERROR_LEVEL, format, args);
2439
void sql_print_warning(const char *format, ...)
2443
va_start(args, format);
2444
errmsg_vprintf (current_session, WARNING_LEVEL, format, args);
2451
void sql_print_information(const char *format, ...)
2455
va_start(args, format);
2456
errmsg_vprintf (current_session, INFORMATION_LEVEL, format, args);
2463
/********* transaction coordinator log for 2pc - mmap() based solution *******/
2466
the log consists of a file, mmapped to a memory.
2467
file is divided on pages of tc_log_page_size size.
2468
(usable size of the first page is smaller because of log header)
2469
there's PAGE control structure for each page
2470
each page (or rather PAGE control structure) can be in one of three
2471
states - active, syncing, pool.
2472
there could be only one page in active or syncing states,
2473
but many in pool - pool is fifo queue.
2474
usual lifecycle of a page is pool->active->syncing->pool
2475
"active" page - is a page where new xid's are logged.
2476
the page stays active as long as syncing slot is taken.
2477
"syncing" page is being synced to disk. no new xid can be added to it.
2478
when the sync is done the page is moved to a pool and an active page
2481
the result of such an architecture is a natural "commit grouping" -
2482
If commits are coming faster than the system can sync, they do not
2483
stall. Instead, all commit that came since the last sync are
2484
logged to the same page, and they all are synced with the next -
2485
one - sync. Thus, thought individual commits are delayed, throughput
2488
when a xid is added to an active page, the thread of this xid waits
2489
for a page's condition until the page is synced. when syncing slot
2490
becomes vacant one of these waiters is awaken to take care of syncing.
2491
it syncs the page and signals all waiters that the page is synced.
2492
PAGE::waiters is used to count these waiters, and a page may never
2493
become active again until waiters==0 (that is all waiters from the
2494
previous sync have noticed the sync was completed)
2496
note, that the page becomes "dirty" and has to be synced only when a
2497
new xid is added into it. Removing a xid from a page does not make it
2498
dirty - we don't sync removals to disk.
2501
uint64_t tc_log_page_waits= 0;
2505
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
2507
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
2509
uint64_t opt_tc_log_size= TC_LOG_MIN_SIZE;
2510
uint64_t tc_log_max_pages_used= 0;
2511
uint64_t tc_log_page_size= 0;
2512
uint64_t tc_log_cur_pages_used= 0;
2514
int TC_LOG_MMAP::open(const char *opt_name)
2517
bool crashed= false;
2520
assert(total_ha_2pc > 1);
2521
assert(opt_name && opt_name[0]);
2523
tc_log_page_size= getpagesize();
2524
assert(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
2526
fn_format(logname,opt_name,drizzle_data_home,"",MY_UNPACK_FILENAME);
2527
if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
2529
if (my_errno != ENOENT)
2531
if (using_heuristic_recover())
2533
if ((fd= my_create(logname, CREATE_MODE, O_RDWR, MYF(MY_WME))) < 0)
2536
file_length= opt_tc_log_size;
2537
if (ftruncate(fd, file_length))
2544
sql_print_information(_("Recovering after a crash using %s"), opt_name);
2545
if (tc_heuristic_recover)
2547
sql_print_error(_("Cannot perform automatic crash recovery when "
2548
"--tc-heuristic-recover is used"));
2551
file_length= lseek(fd, 0, SEEK_END);
2552
if (file_length == OFF_T_MAX || file_length % tc_log_page_size)
2556
data= (unsigned char *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
2557
MAP_NOSYNC|MAP_SHARED, fd, 0);
2558
if (data == MAP_FAILED)
2565
npages=(uint)file_length/tc_log_page_size;
2566
assert(npages >= 3); // to guarantee non-empty pool
2567
if (!(pages=(PAGE *)malloc(npages*sizeof(PAGE))))
2569
memset(pages, 0, npages*sizeof(PAGE));
2571
for (pg=pages, i=0; i < npages; i++, pg++)
2576
pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
2577
pthread_cond_init (&pg->cond, 0);
2578
pg->start=(my_xid *)(data + i*tc_log_page_size);
2580
pg->end=(my_xid *)(pg->start + tc_log_page_size);
2581
pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
2583
pages[0].size=pages[0].free=
2584
(tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
2585
pages[0].start=pages[0].end-pages[0].size;
2586
pages[npages-1].next=0;
2589
if (crashed && recover())
2592
memcpy(data, tc_log_magic, sizeof(tc_log_magic));
2593
data[sizeof(tc_log_magic)]= (unsigned char)total_ha_2pc;
2594
// must cast data to (char *) for solaris. Arg1 is (void *) on linux
2595
// so the cast should be fine.
2596
msync((char *)data, tc_log_page_size, MS_SYNC);
2597
my_sync(fd, MYF(0));
2600
pthread_mutex_init(&LOCK_sync, MY_MUTEX_INIT_FAST);
2601
pthread_mutex_init(&LOCK_active, MY_MUTEX_INIT_FAST);
2602
pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST);
2603
pthread_cond_init(&COND_active, 0);
2604
pthread_cond_init(&COND_pool, 0);
2611
pool_last=pages+npages-1;
2621
there is no active page, let's got one from the pool.
2623
Two strategies here:
2624
-# take the first from the pool
2625
-# if there're waiters - take the one with the most free space.
2628
TODO page merging. try to allocate adjacent page first,
2629
so that they can be flushed both in one sync
2632
void TC_LOG_MMAP::get_active_from_pool()
2634
PAGE **p, **best_p=0;
2638
pthread_mutex_lock(&LOCK_pool);
2643
if ((*p)->waiters == 0) // can the first page be used ?
2644
break; // yes - take it.
2646
best_free=0; // no - trying second strategy
2647
for (p=&(*p)->next; *p; p=&(*p)->next)
2649
if ((*p)->waiters == 0 && (*p)->free > best_free)
2651
best_free=(*p)->free;
2656
while ((*best_p == 0 || best_free == 0) && overflow());
2659
if (active->free == active->size) // we've chosen an empty page
2661
tc_log_cur_pages_used++;
2662
set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
2665
if ((*best_p)->next) // unlink the page from the pool
2666
*best_p=(*best_p)->next;
2671
pthread_mutex_unlock(&LOCK_pool);
2676
perhaps, increase log size ?
2678
int TC_LOG_MMAP::overflow()
2681
simple overflow handling - just wait
2682
TODO perhaps, increase log size ?
2683
let's check the behaviour of tc_log_page_waits first
2685
tc_log_page_waits++;
2686
pthread_cond_wait(&COND_pool, &LOCK_pool);
2687
return 1; // always return 1
2691
Record that transaction XID is committed on the persistent storage.
2693
This function is called in the middle of two-phase commit:
2694
First all resources prepare the transaction, then tc_log->log() is called,
2695
then all resources commit the transaction, then tc_log->unlog() is called.
2697
All access to active page is serialized but it's not a problem, as
2698
we're assuming that fsync() will be a main bottleneck.
2699
That is, parallelizing writes to log pages we'll decrease number of
2700
threads waiting for a page, but then all these threads will be waiting
2701
for a fsync() anyway
2703
If tc_log == DRIZZLE_LOG then tc_log writes transaction to binlog and
2704
records XID in a special Xid_log_event.
2705
If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
2711
\# - otherwise, "cookie", a number that will be passed as an argument
2712
to unlog() call. tc_log can define it any way it wants,
2713
and use for whatever purposes. TC_LOG_MMAP sets it
2714
to the position in memory where xid was logged to.
2717
int TC_LOG_MMAP::log_xid(Session *, my_xid xid)
2723
pthread_mutex_lock(&LOCK_active);
2726
if active page is full - just wait...
2727
frankly speaking, active->free here accessed outside of mutex
2728
protection, but it's safe, because it only means we may miss an
2729
unlog() for the active page, and we're not waiting for it here -
2730
unlog() does not signal COND_active.
2732
while (unlikely(active && active->free == 0))
2733
pthread_cond_wait(&COND_active, &LOCK_active);
2735
/* no active page ? take one from the pool */
2737
get_active_from_pool();
2740
pthread_mutex_lock(&p->lock);
2742
/* searching for an empty slot */
2746
assert(p->ptr < p->end); // because p->free > 0
2749
/* found! store xid there and mark the page dirty */
2750
cookie= (ulong)((unsigned char *)p->ptr - data); // can never be zero
2755
/* to sync or not to sync - this is the question */
2756
pthread_mutex_unlock(&LOCK_active);
2757
pthread_mutex_lock(&LOCK_sync);
2758
pthread_mutex_unlock(&p->lock);
2761
{ // somebody's syncing. let's wait
2764
note - it must be while (), not do ... while () here
2765
as p->state may be not DIRTY when we come here
2767
while (p->state == DIRTY && syncing)
2768
pthread_cond_wait(&p->cond, &LOCK_sync);
2770
err= p->state == ERROR;
2771
if (p->state != DIRTY) // page was synced
2773
if (p->waiters == 0)
2774
pthread_cond_signal(&COND_pool); // in case somebody's waiting
2775
pthread_mutex_unlock(&LOCK_sync);
2776
goto done; // we're done
2778
} // page was not synced! do it now
2779
assert(active == p && syncing == 0);
2780
pthread_mutex_lock(&LOCK_active);
2781
syncing=p; // place is vacant - take it
2782
active=0; // page is not active anymore
2783
pthread_cond_broadcast(&COND_active); // in case somebody's waiting
2784
pthread_mutex_unlock(&LOCK_active);
2785
pthread_mutex_unlock(&LOCK_sync);
2789
return err ? 0 : cookie;
2792
int TC_LOG_MMAP::sync()
2796
assert(syncing != active);
2799
sit down and relax - this can take a while...
2800
note - no locks are held at this point
2802
// must cast data to (char *) for solaris. Arg1 is (void *) on linux
2803
// so the cast should be fine.
2804
err= msync((char *)syncing->start, 1, MS_SYNC);
2806
err= my_sync(fd, MYF(0));
2808
/* page is synced. let's move it to the pool */
2809
pthread_mutex_lock(&LOCK_pool);
2810
pool_last->next=syncing;
2813
syncing->state= err ? ERROR : POOL;
2814
pthread_cond_broadcast(&syncing->cond); // signal "sync done"
2815
pthread_cond_signal(&COND_pool); // in case somebody's waiting
2816
pthread_mutex_unlock(&LOCK_pool);
2818
/* marking 'syncing' slot free */
2819
pthread_mutex_lock(&LOCK_sync);
2821
pthread_cond_signal(&active->cond); // wake up a new syncer
2822
pthread_mutex_unlock(&LOCK_sync);
2827
erase xid from the page, update page free space counters/pointers.
2828
cookie points directly to the memory where xid was logged.
2831
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
2833
PAGE *p=pages+(cookie/tc_log_page_size);
2834
my_xid *x=(my_xid *)(data+cookie);
2837
assert(x >= p->start && x < p->end);
2840
pthread_mutex_lock(&p->lock);
2842
assert(p->free <= p->size);
2843
set_if_smaller(p->ptr, x);
2844
if (p->free == p->size) // the page is completely empty
2845
statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
2846
if (p->waiters == 0) // the page is in pool and ready to rock
2847
pthread_cond_signal(&COND_pool); // ping ... for overflow()
2848
pthread_mutex_unlock(&p->lock);
2851
void TC_LOG_MMAP::close()
2856
pthread_mutex_destroy(&LOCK_sync);
2857
pthread_mutex_destroy(&LOCK_active);
2858
pthread_mutex_destroy(&LOCK_pool);
2859
pthread_cond_destroy(&COND_pool);
2861
data[0]='A'; // garble the first (signature) byte, in case my_delete fails
2863
for (i=0; i < npages; i++)
2865
if (pages[i].ptr == 0)
2867
pthread_mutex_destroy(&pages[i].lock);
2868
pthread_cond_destroy(&pages[i].cond);
2871
free((unsigned char*)pages);
2873
my_munmap((char*)data, (size_t)file_length);
2875
my_close(fd, MYF(0));
2877
if (inited>=5) // cannot do in the switch because of Windows
2878
my_delete(logname, MYF(MY_WME));
2882
int TC_LOG_MMAP::recover()
2885
PAGE *p=pages, *end_p=pages+npages;
2887
if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
2889
sql_print_error(_("Bad magic header in tc log"));
2894
the first byte after magic signature is set to current
2895
number of storage engines on startup
2897
if (data[sizeof(tc_log_magic)] != total_ha_2pc)
2899
sql_print_error(_("Recovery failed! You must enable "
2900
"exactly %d storage engines that support "
2901
"two-phase commit protocol"),
2902
data[sizeof(tc_log_magic)]);
2906
if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
2907
sizeof(my_xid), 0, 0, MYF(0)))
2910
for ( ; p < end_p ; p++)
2912
for (my_xid *x=p->start; x < p->end; x++)
2913
if (*x && my_hash_insert(&xids, (unsigned char *)x))
2917
if (ha_recover(&xids))
2921
memset(data, 0, (size_t)file_length);
2927
sql_print_error(_("Crash recovery failed. Either correct the problem "
2928
"(if it's, for example, out of memory error) and restart, "
2929
"or delete tc log and start drizzled with "
2930
"--tc-heuristic-recover={commit|rollback}"));
2936
TC_LOG_DUMMY tc_log_dummy;
2937
TC_LOG_MMAP tc_log_mmap;
2940
Perform heuristic recovery, if --tc-heuristic-recover was used.
2943
no matter whether heuristic recovery was successful or not
2944
mysqld must exit. So, return value is the same in both cases.
2947
0 no heuristic recovery was requested
2949
1 heuristic recovery was performed
2952
int TC_LOG::using_heuristic_recover()
2954
if (!tc_heuristic_recover)
2957
sql_print_information(_("Heuristic crash recovery mode"));
2959
sql_print_error(_("Heuristic crash recovery failed"));
2960
sql_print_information(_("Please restart mysqld without --tc-heuristic-recover"));
2964
/****** transaction coordinator log for 2pc - binlog() based solution ******/
2965
#define TC_LOG_BINLOG DRIZZLE_BIN_LOG
2969
keep in-memory list of prepared transactions
2970
(add to list in log(), remove on unlog())
2971
and copy it to the new binlog if rotated
2972
but let's check the behaviour of tc_log_page_waits first!
2975
int TC_LOG_BINLOG::open(const char *opt_name)
2980
assert(total_ha_2pc > 1);
2981
assert(opt_name && opt_name[0]);
2983
pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
2984
pthread_cond_init (&COND_prep_xids, 0);
2986
if (!my_b_inited(&index_file))
2988
/* There was a failure to open the index file, can't open the binlog */
2993
if (using_heuristic_recover())
2995
/* generate a new binlog to mask a corrupted one */
2996
open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
3001
if ((error= find_log_pos(&log_info, NULL, 1)))
3003
if (error != LOG_INFO_EOF)
3004
sql_print_error(_("find_log_pos() failed (error: %d)"), error);
3015
Format_description_log_event fdle(BINLOG_VERSION);
3016
char log_name[FN_REFLEN];
3018
if (! fdle.is_valid())
3023
strncpy(log_name, log_info.log_file_name, sizeof(log_name)-1);
3024
} while (!(error= find_next_log(&log_info, 1)));
3026
if (error != LOG_INFO_EOF)
3028
sql_print_error(_("find_log_pos() failed (error: %d)"), error);
3032
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
3034
sql_print_error("%s", errmsg);
3038
if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
3039
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
3040
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
3042
sql_print_information(_("Recovering after a crash using %s"), opt_name);
3043
error= recover(&log, (Format_description_log_event *)ev);
3050
my_close(file, MYF(MY_WME));
3060
/** This is called on shutdown, after ha_panic. */
3061
void TC_LOG_BINLOG::close()
3063
assert(prepared_xids==0);
3064
pthread_mutex_destroy(&LOCK_prep_xids);
3065
pthread_cond_destroy (&COND_prep_xids);
3077
int TC_LOG_BINLOG::log_xid(Session *session, my_xid xid)
3079
Xid_log_event xle(session, xid);
3080
/* TODO: Fix return type */
3082
We always commit the entire transaction when writing an XID. Also
3083
note that the return value is inverted.
3085
TODO: fix backasswards logic on this method
3088
return replicator_end_transaction(session, true, true) ? false : true;
3091
void TC_LOG_BINLOG::unlog(ulong, my_xid)
3093
pthread_mutex_lock(&LOCK_prep_xids);
3094
assert(prepared_xids > 0);
3095
if (--prepared_xids == 0) {
3096
pthread_cond_signal(&COND_prep_xids);
3098
pthread_mutex_unlock(&LOCK_prep_xids);
3099
rotate_and_purge(0); // as ::write() did not rotate
3102
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
3108
if (! fdle->is_valid() ||
3109
hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
3110
sizeof(my_xid), 0, 0, MYF(0)))
3113
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
3115
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
3117
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
3119
if (ev->get_type_code() == XID_EVENT)
3121
Xid_log_event *xev=(Xid_log_event *)ev;
3122
unsigned char *x= (unsigned char *) memdup_root(&mem_root, (unsigned char*) &xev->xid,
3126
my_hash_insert(&xids, x);
3131
if (ha_recover(&xids))
3134
free_root(&mem_root, MYF(0));
3139
free_root(&mem_root, MYF(0));
3142
sql_print_error(_("Crash recovery failed. Either correct the problem "
3143
"(if it's, for example, out of memory error) and restart, "
3144
"or delete (or rename) binary log and start mysqld with "
3145
"--tc-heuristic-recover={commit|rollback}"));
3150
bool DRIZZLE_BIN_LOG::is_table_mapped(Table *table) const
3152
return table->s->table_map_version == table_map_version();
3156
Get the file name of the MySQL binlog.
3157
@return the name of the binlog file
3160
const char* drizzle_bin_log_file_name(void)
3162
return drizzle_bin_log.get_log_fname();
3167
Get the current position of the MySQL binlog.
3168
@return byte offset from the beginning of the binlog
3171
uint64_t drizzle_bin_log_file_pos(void)
3173
return (uint64_t) drizzle_bin_log.get_log_file()->pos_in_file;
3177
mysql_declare_plugin(binlog)
3179
DRIZZLE_STORAGE_ENGINE_PLUGIN,
3183
"This is a pseudo storage engine to represent the binlog in a transaction",
3185
binlog_init, /* Plugin Init */
3186
NULL, /* Plugin Deinit */
3187
NULL, /* status variables */
3188
NULL, /* system variables */
3189
NULL /* config options */
3191
mysql_declare_plugin_end;