~drizzle-trunk/drizzle/development : revision 575.2.2

103

104

/**

105

Return the storage engine handlerton for the supplied name

106

107

@param session current thread

108

@param name name of storage engine

109

110

@return

111

pointer to storage engine plugin handle

112

*/

127

handlerton *hton= plugin_data(plugin, handlerton *);

128

if (!(hton->flags & HTON_NOT_USER_SELECTABLE))

129

return plugin;

130

131

/*

132

unlocking plugin immediately after locking is relatively low cost.

133

*/

1305

#endif

1306

1307

1308

for (info.len= MAX_XID_LIST_SIZE ;

1308

for (info.len= MAX_XID_LIST_SIZE ;

1309

info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)

1310

{

1311

info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));

1316

return(1);

1317

}

1318

1319

plugin_foreach(NULL, xarecover_handlerton,

1319

plugin_foreach(NULL, xarecover_handlerton,

1320

DRIZZLE_STORAGE_ENGINE_PLUGIN, &info);

1321

1322

free((unsigned char*)info.list);

1323

if (info.found_foreign_xids)

1324

sql_print_warning(_("Found %d prepared XA transactions"),

1324

sql_print_warning(_("Found %d prepared XA transactions"),

1325

info.found_foreign_xids);

1326

if (info.dry_run && info.found_my_xids)

1327

{

1419

1420

int ha_release_temporary_latches(Session *session)

1421

{

1422

plugin_foreach(session, release_temporary_latches, DRIZZLE_STORAGE_ENGINE_PLUGIN,

1422

plugin_foreach(session, release_temporary_latches, DRIZZLE_STORAGE_ENGINE_PLUGIN,

1423

NULL);

1424

1425

return 0;

1569

void *arg __attribute__((unused)))

1570

{

1571

handlerton *hton= plugin_data(plugin, handlerton *);

1572

if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&

1572

if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&

1573

hton->flush_logs(hton))

1574

return true;

1575

return false;

1710

handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());

1711

/*

1712

Allocate handler->ref here because otherwise ha_open will allocate it

1713

on this->table->mem_root and we will not be able to reclaim that memory

1713

on this->table->mem_root and we will not be able to reclaim that memory

1714

when the clone handler object is destroyed.

1715

*/

1716

if (!(new_handler->ref= (unsigned char*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))

1776

(void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL

1777

1778

/* ref is already allocated for us if we're called from handler::clone() */

1779

if (!ref && !(ref= (unsigned char*) alloc_root(&table->mem_root,

1779

if (!ref && !(ref= (unsigned char*) alloc_root(&table->mem_root,

1780

ALIGN_SIZE(ref_length)*2)))

1781

{

1782

close();

2053

/* avoid overflow in formula, with this if() */

2054

if (nb_already_reserved_intervals <= AUTO_INC_DEFAULT_NB_MAX_BITS)

2055

{

2056

nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *

2056

nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *

2057

(1 << nb_already_reserved_intervals);

2058

set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);

2059

}

2067

&nb_reserved_values);

2068

if (nr == ~(uint64_t) 0)

2069

return(HA_ERR_AUTOINC_READ_FAILED); // Mark failure

2070

2071

/*

2072

That rounding below should not be needed when all engines actually

2073

respect offset and increment in get_auto_increment(). But they don't

2078

*/

2079

nr= compute_next_insert_id(nr-1, variables);

2080

}

2081

2082

if (table->s->next_number_keypart == 0)

2083

{

2084

/* We must defer the appending until "nr" has been possibly truncated */

3050

char name_buff[FN_REFLEN];

3051

const char *name;

3052

TABLE_SHARE share;

3053

3054

init_tmp_table_share(session, &share, db, 0, table_name, path);

3055

if (open_table_def(session, &share, 0) ||

3056

open_table_from_share(session, &share, "", 0, (uint) READ_ALL, 0, &table,

3248

{

3249

const char *db;

3250

const char *name;

3251

unsigned char **frmblob;

3251

unsigned char **frmblob;

3252

size_t *frmlen;

3253

};

3254

3258

st_discover_args *vargs= (st_discover_args *)arg;

3259

handlerton *hton= plugin_data(plugin, handlerton *);

3260

if (hton->state == SHOW_OPTION_YES && hton->discover &&

3261

(!(hton->discover(hton, session, vargs->db, vargs->name,

3262

vargs->frmblob,

3261

(!(hton->discover(hton, session, vargs->db, vargs->name,

3262

vargs->frmblob,

3263

vargs->frmlen))))

3264

return true;

3265

3419

ha_rows rows, total_rows= 0;

3420

uint32_t n_ranges=0;

3421

Session *session= current_session;

3422

3423

/* Default MRR implementation doesn't need buffer */

3424

*bufsz= 0;

3425

3428

{

3429

if (unlikely(session->killed != 0))

3430

return HA_POS_ERROR;

3431

3432

n_ranges++;

3433

key_range *min_endp, *max_endp;

3434

{

3439

rows= 1; /* there can be at most one row */

3440

else

3441

{

3442

if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,

3442

if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,

3443

max_endp)))

3444

{

3445

/* Can't scan one range => can't do MRR scan at all */

3449

}

3450

total_rows += rows;

3451

}

3452

3453

if (total_rows != HA_POS_ERROR)

3454

{

3455

/* The following calculation is the same as in multi_range_read_info(): */

3522

/**

3523

Initialize the MRR scan

3524

3525

Initialize the MRR scan. This function may do heavyweight scan

3525

Initialize the MRR scan. This function may do heavyweight scan

3526

initialization like row prefetching/sorting/etc (NOTE: but better not do

3527

it here as we may not need it, e.g. if we never satisfy WHERE clause on

3528

previous tables. For many implementations it would be natural to do such

3529

initializations in the first multi_read_range_next() call)

3530

3531

mode is a combination of the following flags: HA_MRR_SORTED,

3532

HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION

3532

HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION

3533

3534

@param seq Range sequence to be traversed

3535

@param seq_init_param First parameter for seq->init()

3541

One must have called index_init() before calling this function. Several

3542

multi_range_read_init() calls may be made in course of one query.

3543

3544

Until WL#2623 is done (see its text, section 3.2), the following will

3544

Until WL#2623 is done (see its text, section 3.2), the following will

3545

also hold:

3546

The caller will guarantee that if "seq->init == mrr_ranges_array_init"

3547

then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.

3548

This property will only be used by NDB handler until WL#2623 is done.

3549

3550

Buffer memory management is done according to the following scenario:

3551

The caller allocates the buffer and provides it to the callee by filling

3552

the members of HANDLER_BUFFER structure.

3641

3642

3643

/* **************************************************************************

3644

* DS-MRR implementation

3644

* DS-MRR implementation

3645

***************************************************************************/

3646

3647

/**

3684

3685

is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);

3686

rowids_buf_end= buf->buffer_end;

3687

3688

elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);

3689

rowids_buf_last= rowids_buf +

3689

rowids_buf_last= rowids_buf +

3690

((rowids_buf_end - rowids_buf)/ elem_size)*

3691

elem_size;

3692

rowids_buf_end= rowids_buf_last;

3693

3694

/* Create a separate handler object to do rndpos() calls. */

3695

Session *session= current_session;

3696

if (!(new_h2= h->clone(session->mem_root)) ||

3696

if (!(new_h2= h->clone(session->mem_root)) ||

3697

new_h2->ha_external_lock(session, F_RDLCK))

3698

{

3699

delete new_h2;

3712

table->prepare_for_position();

3713

new_h2->extra(HA_EXTRA_KEYREAD);

3714

3715

if (h2->ha_index_init(keyno, false) ||

3715

if (h2->ha_index_init(keyno, false) ||

3716

h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,

3717

mode, buf))

3718

goto error;

3719

use_default_impl= false;

3720

3721

if (pushed_cond)

3722

h2->idx_cond_push(keyno, pushed_cond);

3723

if (dsmrr_fill_buffer(new_h2))

3727

If the above call has scanned through all intervals in *seq, then

3728

adjust *buf to indicate that the remaining buffer space will not be used.

3729

*/

3730

if (dsmrr_eof)

3730

if (dsmrr_eof)

3731

buf->end_of_used_area= rowids_buf_last;

3732

3733

if (h->ha_rnd_init(false))

3734

goto error;

3735

3736

return(0);

3737

error:

3738

h2->ha_index_or_rnd_end();

3767

DS-MRR: Fill the buffer with rowids and sort it by rowid

3768

3769

{This is an internal function of DiskSweep MRR implementation}

3770

Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into

3771

buffer. When the buffer is full or scan is completed, sort the buffer by

3770

Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into

3771

buffer. When the buffer is full or scan is completed, sort the buffer by

3772

rowid and return.

3773

3774

The function assumes that rowids buffer is empty when it is invoked.

3775

3773

3774

The function assumes that rowids buffer is empty when it is invoked.

3775

3776

@param h Table handler

3777

3778

@retval 0 OK, the next portion of rowids is in the buffer,

3786

int res = 0;

3787

3788

rowids_buf_cur= rowids_buf;

3789

while ((rowids_buf_cur < rowids_buf_end) &&

3789

while ((rowids_buf_cur < rowids_buf_end) &&

3790

!(res= h2->handler::multi_range_read_next(&range_info)))

3791

{

3792

/* Put rowid, or {rowid, range_id} pair into the buffer */

3802

}

3803

3804

if (res && res != HA_ERR_END_OF_FILE)

3805

return(res);

3805

return(res);

3806

dsmrr_eof= test(res == HA_ERR_END_OF_FILE);

3807

3808

/* Sort the buffer contents by rowid */

3809

uint32_t elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);

3810

uint32_t n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;

3811

3812

my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,

3813

(void*)h);

3814

rowids_buf_last= rowids_buf_cur;

3824

int DsMrr_impl::dsmrr_next(handler *h, char **range_info)

3825

{

3826

int res;

3827

3828

if (use_default_impl)

3829

return h->handler::multi_range_read_next(range_info);

3830

3831

if (rowids_buf_cur == rowids_buf_last)

3832

{

3833

if (dsmrr_eof)

3839

if (res)

3840

goto end;

3841

}

3842

3843

/* Return EOF if there are no rowids in the buffer after re-fill attempt */

3844

if (rowids_buf_cur == rowids_buf_last)

3845

{

3867

*/

3868

int DsMrr_impl::dsmrr_info(uint32_t keyno, uint32_t n_ranges, uint32_t rows, uint32_t *bufsz,

3869

uint32_t *flags, COST_VECT *cost)

3870

{

3870

{

3871

int res;

3872

uint32_t def_flags= *flags;

3873

uint32_t def_bufsz= *bufsz;

3877

&def_flags, cost);

3878

assert(!res);

3879

3880

if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||

3880

if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||

3881

choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))

3882

{

3883

/* Default implementation is choosen */

3893

*/

3894

3895

ha_rows DsMrr_impl::dsmrr_info_const(uint32_t keyno, RANGE_SEQ_IF *seq,

3896

void *seq_init_param, uint32_t n_ranges,

3896

void *seq_init_param, uint32_t n_ranges,

3897

uint32_t *bufsz, uint32_t *flags, COST_VECT *cost)

3898

{

3899

ha_rows rows;

3901

uint32_t def_bufsz= *bufsz;

3902

/* Get cost/flags/mem_usage of default MRR implementation */

3903

rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,

3904

n_ranges, &def_bufsz,

3904

n_ranges, &def_bufsz,

3905

&def_flags, cost);

3906

if (rows == HA_POS_ERROR)

3907

{

3988

COST_VECT dsmrr_cost;

3989

bool res;

3990

Session *session= current_session;

3991

if ((session->variables.optimizer_use_mrr == 2) ||

3991

if ((session->variables.optimizer_use_mrr == 2) ||

3992

(*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||

3993

(keyno == table->s->primary_key &&

3994

h->primary_key_is_clustered()) ||

3993

(keyno == table->s->primary_key &&

3994

h->primary_key_is_clustered()) ||

3995

key_uses_partial_cols(keyno))

3996

{

3997

/* Use the default implementation */

3998

*flags |= HA_MRR_USE_DEFAULT_IMPL;

3999

return true;

4000

}

4001

4002

uint32_t add_len= table->key_info[keyno].key_length + h->ref_length;

4001

4002

uint32_t add_len= table->key_info[keyno].key_length + h->ref_length;

4003

*bufsz -= add_len;

4004

if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))

4005

return true;

4006

*bufsz += add_len;

4007

4008

bool force_dsmrr;

4009

/*

4009

/*

4010

If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of

4011

DS-MRR and Default implementations cost. This allows one to force use of

4012

DS-MRR whenever it is applicable without affecting other cost-based

4065

4066

/* Number of iterations we'll make with full buffer */

4067

n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);

4068

4069

/*

4070

Get numbers of rows we'll be processing in

4071

- non-last sweep, with full buffer

4068

4069

/*

4070

Get numbers of rows we'll be processing in

4071

- non-last sweep, with full buffer

4072

- last iteration, with non-full buffer

4073

*/

4074

rows_in_full_step= max_buff_entries;

4075

rows_in_last_step= rows % max_buff_entries;

4076

4077

/* Adjust buffer size if we expect to use only part of the buffer */

4078

if (n_full_steps)

4079

{

4083

else

4084

{

4085

cost->zero();

4086

*buffer_size= cmax((ulong)*buffer_size,

4087

(size_t)(1.2*rows_in_last_step) * elem_size +

4086

*buffer_size= cmax((ulong)*buffer_size,

4087

(size_t)(1.2*rows_in_last_step) * elem_size +

4088

h->ref_length + table->key_info[keynr].key_length);

4089

}

4090

4091

COST_VECT last_step_cost;

4092

get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);

4093

cost->add(&last_step_cost);

4094

4095

if (n_full_steps != 0)

4096

cost->mem_cost= *buffer_size;

4097

else

4098

cost->mem_cost= (double)rows_in_last_step * elem_size;

4099

4100

/* Total cost of all index accesses */

4101

index_read_cost= h->index_only_read_time(keynr, (double)rows);

4102

cost->add_io(index_read_cost, 1 /* Random seeks */);

4104

}

4105

4106

4107

/*

4107

/*

4108

Get cost of one sort-and-sweep step

4109

4110

SYNOPSIS

4119

- read #nrows records from table in a sweep.

4120

*/

4121

4122

static

4122

static

4123

void get_sort_and_sweep_cost(Table *table, ha_rows nrows, COST_VECT *cost)

4124

{

4125

if (nrows)

4153

Time to move the disk head is proportional to head travel distance.

4154

4155

Time to wait for the plate to rotate depends on whether the disk head

4156

was moved or not.

4156

was moved or not.

4157

4158

If disk head wasn't moved, the wait time is proportional to distance

4159

between the previous block and the block we're reading.

4164

4165

Our cost units are "random disk seeks". The cost of random disk seek is

4166

actually not a constant, it depends one range of cylinders we're going

4167

to access. We make it constant by introducing a fuzzy concept of "typical

4167

to access. We make it constant by introducing a fuzzy concept of "typical

4168

datafile length" (it's fuzzy as it's hard to tell whether it should

4169

include index file, temp.tables etc). Then random seek cost is:

4170

4179

@param cost OUT The cost.

4180

*/

4181

4182

void get_sweep_read_cost(Table *table, ha_rows nrows, bool interrupted,

4182

void get_sweep_read_cost(Table *table, ha_rows nrows, bool interrupted,

4183

COST_VECT *cost)

4184

{

4185

cost->zero();

4258

start_key->keypart_map,

4259

start_key->flag);

4260

if (result)

4261

return((result == HA_ERR_KEY_NOT_FOUND)

4261

return((result == HA_ERR_KEY_NOT_FOUND)

4262

? HA_ERR_END_OF_FILE

4263

: result);

4264

4526

continue;

4527

4528

Table **const end_ptr= lock->table + lock->table_count;

4529

for (Table **table_ptr= lock->table ;

4529

for (Table **table_ptr= lock->table ;

4530

table_ptr != end_ptr ;

4531

++table_ptr)

4532

{