1
/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
4
* Copyright (C) 2008 Sun Microsystems
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; version 2 of the License.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
#include <drizzled/server_includes.h>
29
#include <google/protobuf/io/zero_copy_stream.h>
30
#include <google/protobuf/io/zero_copy_stream_impl.h>
32
#include "mysys/my_dir.h"
33
#include "mysys/hash.h"
34
#include "mysys/cached_directory.h"
36
#include <drizzled/definitions.h>
37
#include <drizzled/base.h>
38
#include <drizzled/cursor.h>
39
#include <drizzled/plugin/storage_engine.h>
40
#include <drizzled/session.h>
41
#include <drizzled/error.h>
42
#include <drizzled/gettext.h>
43
#include <drizzled/name_map.h>
44
#include <drizzled/unireg.h>
45
#include <drizzled/data_home.h>
46
#include "drizzled/errmsg_print.h"
47
#include "drizzled/name_map.h"
48
#include "drizzled/xid.h"
50
#include <drizzled/table_proto.h>
57
NameMap<plugin::StorageEngine *> all_engines;
58
static std::vector<plugin::StorageEngine *> vector_of_engines;
59
static std::vector<plugin::StorageEngine *> vector_of_transactional_engines;
60
static std::set<std::string> set_of_table_definition_ext;
62
plugin::StorageEngine::StorageEngine(const string name_arg,
63
const bitset<HTON_BIT_SIZE> &flags_arg,
64
size_t savepoint_offset_arg,
66
: Plugin(name_arg, "StorageEngine"),
67
two_phase_commit(support_2pc),
70
savepoint_offset(savepoint_alloc_size),
71
orig_savepoint_offset(savepoint_offset_arg),
76
savepoint_alloc_size+= orig_savepoint_offset;
81
pthread_mutex_init(&proto_cache_mutex, NULL);
85
plugin::StorageEngine::~StorageEngine()
87
savepoint_alloc_size-= orig_savepoint_offset;
88
pthread_mutex_destroy(&proto_cache_mutex);
91
void plugin::StorageEngine::setTransactionReadWrite(Session& session)
93
Ha_trx_info *ha_info= &session.ha_data[getSlot()].ha_info[0];
95
When a storage engine method is called, the transaction must
96
have been started, unless it's a DDL call, for which the
97
storage engine starts the transaction internally, and commits
98
it internally, without registering in the ha_list.
99
Unfortunately here we can't know know for sure if the engine
100
has registered the transaction or not, so we must check.
102
if (ha_info->is_started())
105
* table_share can be NULL in plugin::StorageEngine::dropTable().
107
ha_info->set_trx_read_write();
113
int plugin::StorageEngine::doRenameTable(Session *,
118
for (const char **ext= bas_ext(); *ext ; ext++)
120
if (rename_file_ext(from, to, *ext))
122
if ((error=my_errno) != ENOENT)
132
Delete all files with extension from bas_ext().
134
@param name Base name of table
137
We assume that the Cursor may return more extensions than
138
was actually used for the file.
141
0 If we successfully deleted at least one file from base_ext and
142
didn't get any other errors than ENOENT
146
int plugin::StorageEngine::doDropTable(Session&,
147
const string table_path)
150
int enoent_or_zero= ENOENT; // Error if no file was deleted
151
char buff[FN_REFLEN];
153
for (const char **ext= bas_ext(); *ext ; ext++)
155
fn_format(buff, table_path.c_str(), "", *ext,
156
MY_UNPACK_FILENAME|MY_APPEND_EXT);
157
if (my_delete_with_symlink(buff, MYF(0)))
159
if ((error= my_errno) != ENOENT)
163
enoent_or_zero= 0; // No error for ENOENT
164
error= enoent_or_zero;
169
const char *plugin::StorageEngine::checkLowercaseNames(const char *path,
172
if (flags.test(HTON_BIT_FILE_BASED))
175
/* Ensure that table Cursor get path in lower case */
176
if (tmp_path != path)
177
strcpy(tmp_path, path);
180
we only should turn into lowercase database/table part
181
so start the process after homedirectory
183
if (strstr(tmp_path, drizzle_tmpdir) == tmp_path)
184
my_casedn_str(files_charset_info, tmp_path + strlen(drizzle_tmpdir));
186
my_casedn_str(files_charset_info, tmp_path + drizzle_data_home_len);
192
bool plugin::StorageEngine::addPlugin(plugin::StorageEngine *engine)
194
if (all_engines.add(engine))
196
errmsg_printf(ERRMSG_LVL_ERROR,
197
_("Couldn't add StorageEngine"));
201
vector_of_engines.push_back(engine);
203
if (engine->check_flag(HTON_BIT_DOES_TRANSACTIONS))
204
vector_of_transactional_engines.push_back(engine);
206
if (engine->getTableDefinitionFileExtension().length())
208
assert(engine->getTableDefinitionFileExtension().length() == DEFAULT_DEFINITION_FILE_EXT.length());
209
set_of_table_definition_ext.insert(engine->getTableDefinitionFileExtension());
215
void plugin::StorageEngine::removePlugin(plugin::StorageEngine *engine)
217
all_engines.remove(engine);
218
vector_of_engines.clear();
219
vector_of_transactional_engines.clear();
222
plugin::StorageEngine *plugin::StorageEngine::findByName(string find_str)
224
transform(find_str.begin(), find_str.end(),
225
find_str.begin(), ::tolower);
227
plugin::StorageEngine *engine= all_engines.find(find_str);
229
if (engine && engine->is_user_selectable())
235
plugin::StorageEngine *plugin::StorageEngine::findByName(Session& session,
239
transform(find_str.begin(), find_str.end(),
240
find_str.begin(), ::tolower);
242
if (find_str.compare("default") == 0)
243
return session.getDefaultStorageEngine();
245
plugin::StorageEngine *engine= all_engines.find(find_str);
247
if (engine && engine->is_user_selectable())
253
class StorageEngineCloseConnection
254
: public unary_function<plugin::StorageEngine *, void>
258
StorageEngineCloseConnection(Session *session_arg) : session(session_arg) {}
260
there's no need to rollback here as all transactions must
261
be rolled back already
263
inline result_type operator() (argument_type engine)
265
if (engine->is_enabled() &&
266
session_get_ha_data(session, engine))
267
engine->close_connection(session);
273
don't bother to rollback here, it's done already
275
void plugin::StorageEngine::closeConnection(Session* session)
277
for_each(vector_of_engines.begin(), vector_of_engines.end(),
278
StorageEngineCloseConnection(session));
281
void plugin::StorageEngine::dropDatabase(char* path)
283
for_each(vector_of_engines.begin(), vector_of_engines.end(),
284
bind2nd(mem_fun(&plugin::StorageEngine::drop_database),path));
287
int plugin::StorageEngine::commitOrRollbackByXID(XID *xid, bool commit)
292
transform(all_engines.begin(), all_engines.end(), results.begin(),
293
bind2nd(mem_fun(&plugin::StorageEngine::commit_by_xid),xid));
295
transform(all_engines.begin(), all_engines.end(), results.begin(),
296
bind2nd(mem_fun(&plugin::StorageEngine::rollback_by_xid),xid));
298
if (find_if(results.begin(), results.end(), bind2nd(equal_to<int>(),0))
306
This function should be called when MySQL sends rows of a SELECT result set
307
or the EOF mark to the client. It releases a possible adaptive hash index
308
S-latch held by session in InnoDB and also releases a possible InnoDB query
309
FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a session to
310
keep them over several calls of the InnoDB Cursor interface when a join
311
is executed. But when we let the control to pass to the client they have
312
to be released because if the application program uses mysql_use_result(),
313
it may deadlock on the S-latch if the application on another connection
314
performs another SQL query. In MySQL-4.1 this is even more important because
315
there a connection can have several SELECT queries open at the same time.
317
@param session the thread handle of the current connection
322
int plugin::StorageEngine::releaseTemporaryLatches(Session *session)
324
for_each(vector_of_transactional_engines.begin(), vector_of_transactional_engines.end(),
325
bind2nd(mem_fun(&plugin::StorageEngine::release_temporary_latches),session));
329
bool plugin::StorageEngine::flushLogs(plugin::StorageEngine *engine)
333
if (find_if(all_engines.begin(), all_engines.end(),
334
mem_fun(&plugin::StorageEngine::flush_logs))
335
!= all_engines.begin())
340
if ((!engine->is_enabled()) ||
341
(engine->flush_logs()))
348
recover() step of xa.
351
there are three modes of operation:
352
- automatic recover after a crash
353
in this case commit_list != 0, tc_heuristic_recover==0
354
all xids from commit_list are committed, others are rolled back
355
- manual (heuristic) recover
356
in this case commit_list==0, tc_heuristic_recover != 0
357
DBA has explicitly specified that all prepared transactions should
358
be committed (or rolled back).
359
- no recovery (MySQL did not detect a crash)
360
in this case commit_list==0, tc_heuristic_recover == 0
361
there should be no prepared transactions in this case.
363
class XARecover : unary_function<plugin::StorageEngine *, void>
365
int trans_len, found_foreign_xids, found_my_xids;
371
XARecover(XID *trans_list_arg, int trans_len_arg,
372
HASH *commit_list_arg, bool dry_run_arg)
373
: trans_len(trans_len_arg), found_foreign_xids(0), found_my_xids(0),
375
trans_list(trans_list_arg), commit_list(commit_list_arg),
381
return found_foreign_xids;
386
return found_my_xids;
389
result_type operator() (argument_type engine)
394
if (engine->is_enabled())
396
while ((got= engine->recover(trans_list, trans_len)) > 0 )
398
errmsg_printf(ERRMSG_LVL_INFO,
399
_("Found %d prepared transaction(s) in %s"),
400
got, engine->getName().c_str());
401
for (int i=0; i < got; i ++)
403
my_xid x=trans_list[i].get_my_xid();
404
if (!x) // not "mine" - that is generated by external TM
406
xid_cache_insert(trans_list+i, XA_PREPARED);
407
found_foreign_xids++;
417
hash_search(commit_list, (unsigned char *)&x, sizeof(x)) != 0 :
418
tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
420
engine->commit_by_xid(trans_list+i);
424
engine->rollback_by_xid(trans_list+i);
434
int plugin::StorageEngine::recover(HASH *commit_list)
436
XID *trans_list= NULL;
439
bool dry_run= (commit_list==0 && tc_heuristic_recover==0);
441
/* commit_list and tc_heuristic_recover cannot be set both */
442
assert(commit_list==0 || tc_heuristic_recover==0);
444
/* if either is set, total_ha_2pc must be set too */
445
if (total_ha_2pc <= 1)
449
#ifndef WILL_BE_DELETED_LATER
452
for now, only InnoDB supports 2pc. It means we can always safely
453
rollback all pending transactions, without risking inconsistent data
456
assert(total_ha_2pc == 2); // only InnoDB and binlog
457
tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
460
for (trans_len= MAX_XID_LIST_SIZE ;
461
trans_list==0 && trans_len > MIN_XID_LIST_SIZE; trans_len/=2)
463
trans_list=(XID *)malloc(trans_len*sizeof(XID));
467
errmsg_printf(ERRMSG_LVL_ERROR, ER(ER_OUTOFMEMORY), trans_len*sizeof(XID));
472
errmsg_printf(ERRMSG_LVL_INFO, _("Starting crash recovery..."));
475
XARecover recover_func(trans_list, trans_len, commit_list, dry_run);
476
for_each(vector_of_transactional_engines.begin(), vector_of_transactional_engines.end(),
480
if (recover_func.getForeignXIDs())
481
errmsg_printf(ERRMSG_LVL_WARN,
482
_("Found %d prepared XA transactions"),
483
recover_func.getForeignXIDs());
484
if (dry_run && recover_func.getMyXIDs())
486
errmsg_printf(ERRMSG_LVL_ERROR,
487
_("Found %d prepared transactions! It means that drizzled "
488
"was not shut down properly last time and critical "
489
"recovery information (last binlog or %s file) was "
490
"manually deleted after a crash. You have to start "
491
"drizzled with the --tc-heuristic-recover switch to "
492
"commit or rollback pending transactions."),
493
recover_func.getMyXIDs(), opt_tc_log_file);
497
errmsg_printf(ERRMSG_LVL_INFO, _("Crash recovery finished."));
501
int plugin::StorageEngine::startConsistentSnapshot(Session *session)
503
for_each(vector_of_engines.begin(), vector_of_engines.end(),
504
bind2nd(mem_fun(&plugin::StorageEngine::start_consistent_snapshot),
509
class StorageEngineGetTableDefinition: public unary_function<plugin::StorageEngine *,bool>
514
const char *table_name;
516
message::Table *table_proto;
520
StorageEngineGetTableDefinition(Session& session_arg,
521
const char* path_arg,
523
const char *table_name_arg,
524
const bool is_tmp_arg,
525
message::Table *table_proto_arg,
527
session(session_arg),
530
table_name(table_name_arg),
532
table_proto(table_proto_arg),
535
result_type operator() (argument_type engine)
537
int ret= engine->doGetTableDefinition(session,
547
return *err == EEXIST;
551
static int drizzle_read_table_proto(const char* path, message::Table* table)
553
int fd= open(path, O_RDONLY);
558
google::protobuf::io::ZeroCopyInputStream* input=
559
new google::protobuf::io::FileInputStream(fd);
561
if (table->ParseFromZeroCopyStream(input) == false)
574
Call this function in order to give the Cursor the possiblity
575
to ask engine if there are any new tables that should be written to disk
576
or any dropped tables that need to be removed from disk
578
int plugin::StorageEngine::getTableDefinition(Session& session,
583
message::Table *table_proto)
587
vector<plugin::StorageEngine *>::iterator iter=
588
find_if(vector_of_engines.begin(), vector_of_engines.end(),
589
StorageEngineGetTableDefinition(session, path, NULL, NULL, true, table_proto, &err));
591
if (iter == vector_of_engines.end())
593
string proto_path(path);
594
string file_ext(".dfe");
595
proto_path.append(file_ext);
597
int error= access(proto_path.c_str(), F_OK);
606
int read_proto_err= drizzle_read_table_proto(proto_path.c_str(),
618
An interceptor to hijack the text of the error message without
619
setting an error in the thread. We need the text to present it
620
in the form of a warning to the user.
623
class Ha_delete_table_error_handler: public Internal_error_handler
626
Ha_delete_table_error_handler() : Internal_error_handler() {}
627
virtual bool handle_error(uint32_t sql_errno,
629
DRIZZLE_ERROR::enum_warning_level level,
631
char buff[DRIZZLE_ERRMSG_SIZE];
636
Ha_delete_table_error_handler::
637
handle_error(uint32_t ,
639
DRIZZLE_ERROR::enum_warning_level ,
642
/* Grab the error message */
643
strncpy(buff, message, sizeof(buff)-1);
649
This should return ENOENT if the file doesn't exists.
650
The .frm file will be deleted only if we return 0 or ENOENT
652
int plugin::StorageEngine::dropTable(Session& session, const char *path,
653
const char *db, const char *alias,
654
bool generate_warning)
658
message::Table src_proto;
659
plugin::StorageEngine* engine;
661
error_proto= plugin::StorageEngine::getTableDefinition(session,
668
engine= plugin::StorageEngine::findByName(session,
669
src_proto.engine().name());
673
engine->setTransactionReadWrite(session);
674
error= engine->doDropTable(session, path);
681
if (engine && engine->check_flag(HTON_BIT_HAS_DATA_DICTIONARY))
682
delete_table_proto_file(path);
684
error= delete_table_proto_file(path);
688
if (error_proto && error == 0)
691
if (error && generate_warning)
693
TableShare dummy_share;
699
if ((file= engine->create(dummy_share, session.mem_root)))
702
memset(&dummy_table, 0, sizeof(dummy_table));
703
memset(&dummy_share, 0, sizeof(dummy_share));
704
dummy_table.s= &dummy_share;
707
Because file->print_error() use my_error() to generate the error message
708
we use an internal error Cursor to intercept it and store the text
709
in a temporary buffer. Later the message will be presented to user
712
Ha_delete_table_error_handler ha_delete_table_error_handler;
714
/* Fill up strucutures that print_error may need */
715
dummy_share.path.str= (char*) path;
716
dummy_share.path.length= strlen(path);
717
dummy_share.db.str= (char*) db;
718
dummy_share.db.length= strlen(db);
719
dummy_share.table_name.str= (char*) alias;
720
dummy_share.table_name.length= strlen(alias);
721
dummy_table.alias= alias;
725
file->change_table_ptr(&dummy_table, &dummy_share);
727
session.push_internal_handler(&ha_delete_table_error_handler);
728
file->print_error(error, 0);
730
session.pop_internal_handler();
733
error= -1; /* General form of fail. maybe bad FRM */
736
XXX: should we convert *all* errors to warnings here?
737
What if the error is fatal?
739
push_warning(&session, DRIZZLE_ERROR::WARN_LEVEL_ERROR, error,
740
ha_delete_table_error_handler.buff);
747
Initiates table-file and calls appropriate database-creator.
754
int plugin::StorageEngine::createTable(Session& session, const char *path,
755
const char *db, const char *table_name,
756
HA_CREATE_INFO& create_info,
757
bool update_create_info,
758
drizzled::message::Table& table_proto, bool proto_used)
762
TableShare share(db, 0, table_name, path);
763
message::Table tmp_proto;
767
if (parse_table_proto(session, table_proto, &share))
772
if (open_table_def(session, &share))
776
if (open_table_from_share(&session, &share, "", 0, (uint32_t) READ_ALL, 0,
780
if (update_create_info)
781
table.updateCreateInfo(&create_info, &table_proto);
784
char name_buff[FN_REFLEN];
785
const char *table_name_arg;
787
table_name_arg= share.storage_engine->checkLowercaseNames(path, name_buff);
789
share.storage_engine->setTransactionReadWrite(session);
791
error= share.storage_engine->doCreateTable(&session, table_name_arg, table,
792
create_info, table_proto);
795
table.closefrm(false);
798
char name_buff[FN_REFLEN];
799
sprintf(name_buff,"%s.%s",db,table_name);
800
my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error);
803
share.free_table_share();
807
Cursor *plugin::StorageEngine::getCursor(TableShare &share, MEM_ROOT *alloc)
813
if ((file= create(share, alloc)))
819
TODO -> Remove this to force all engines to implement their own file. Solves the "we only looked at dfe" problem.
821
void plugin::StorageEngine::doGetTableNames(CachedDirectory &directory, string&, set<string>& set_of_names)
823
CachedDirectory::Entries entries= directory.getEntries();
825
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
826
entry_iter != entries.end(); ++entry_iter)
828
CachedDirectory::Entry *entry= *entry_iter;
829
string *filename= &entry->filename;
831
assert(filename->size());
833
const char *ext= strchr(filename->c_str(), '.');
835
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_DEFINITION_FILE_EXT.c_str()) ||
836
is_prefix(filename->c_str(), TMP_FILE_PREFIX))
840
char uname[NAME_LEN + 1];
841
uint32_t file_name_len;
843
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
844
// TODO: Remove need for memory copy here
845
uname[file_name_len - sizeof(".dfe") + 1]= '\0'; // Subtract ending, place NULL
846
set_of_names.insert(uname);
852
public unary_function<plugin::StorageEngine *, void>
855
CachedDirectory& directory;
856
set<string>& set_of_names;
860
AddTableName(CachedDirectory& directory_arg, string& database_name, set<string>& of_names) :
861
directory(directory_arg),
862
set_of_names(of_names)
867
result_type operator() (argument_type engine)
869
engine->doGetTableNames(directory, db, set_of_names);
873
void plugin::StorageEngine::getTableNames(string& db, set<string>& set_of_names)
875
char tmp_path[FN_REFLEN];
877
build_table_filename(tmp_path, sizeof(tmp_path), db.c_str(), "", false);
879
CachedDirectory directory(tmp_path, set_of_table_definition_ext);
881
if (db.compare("information_schema"))
883
if (directory.fail())
885
my_errno= directory.getError();
886
if (my_errno == ENOENT)
887
my_error(ER_BAD_DB_ERROR, MYF(ME_BELL+ME_WAITTANG), db.c_str());
889
my_error(ER_CANT_READ_DIR, MYF(ME_BELL+ME_WAITTANG), directory.getPath(), my_errno);
894
for_each(vector_of_engines.begin(), vector_of_engines.end(),
895
AddTableName(directory, db, set_of_names));
898
/* This will later be converted to TableIdentifiers */
899
class DropTables: public unary_function<plugin::StorageEngine *, void>
902
set<string>& set_of_names;
906
DropTables(Session &session_arg, set<string>& of_names) :
907
session(session_arg),
908
set_of_names(of_names)
911
result_type operator() (argument_type engine)
914
for (set<string>::iterator iter= set_of_names.begin();
915
iter != set_of_names.end();
918
int error= engine->doDropTable(session, *iter);
920
// On a return of zero we know we found and deleted the table. So we
921
// remove it from our search.
923
set_of_names.erase(iter);
929
This only works for engines which use file based DFE.
931
Note-> Unlike MySQL, we do not, on purpose, delete files that do not match any engines.
933
void plugin::StorageEngine::removeLostTemporaryTables(Session &session, const char *directory)
935
CachedDirectory dir(directory, set_of_table_definition_ext);
936
set<string> set_of_table_names;
940
my_errno= dir.getError();
941
my_error(ER_CANT_READ_DIR, MYF(0), directory, my_errno);
946
CachedDirectory::Entries files= dir.getEntries();
948
for (CachedDirectory::Entries::iterator fileIter= files.begin();
949
fileIter != files.end(); fileIter++)
953
CachedDirectory::Entry *entry= *fileIter;
955
/* We remove the file extension. */
956
length= entry->filename.length();
957
entry->filename.resize(length - DEFAULT_DEFINITION_FILE_EXT.length());
961
path+= entry->filename;
962
set_of_table_names.insert(path);
965
for_each(vector_of_engines.begin(), vector_of_engines.end(),
966
DropTables(session, set_of_table_names));
969
Now we just clean up anything that might left over.
971
We rescan because some of what might have been there should
972
now be all nice and cleaned up.
974
set<string> all_exts= set_of_table_definition_ext;
976
for (vector<plugin::StorageEngine *>::iterator iter= vector_of_engines.begin();
977
iter != vector_of_engines.end() ; iter++)
979
for (const char **ext= (*iter)->bas_ext(); *ext ; ext++)
980
all_exts.insert(*ext);
983
CachedDirectory rescan(directory, all_exts);
985
files= rescan.getEntries();
986
for (CachedDirectory::Entries::iterator fileIter= files.begin();
987
fileIter != files.end(); fileIter++)
990
CachedDirectory::Entry *entry= *fileIter;
994
path+= entry->filename;
996
unlink(path.c_str());
1001
} /* namespace drizzled */