~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/row/row0merge.c

Committer: Monty Taylor
Date: 2008-12-03 08:17:06 UTC
mto: (641.3.1 devel) (933.1.1 innodb-plugin-merge) (1114.1.1 innodb-plugin-merge)
mto: This revision was merged to the branch mainline in revision 642.
Revision ID: mordred@solanthus.local-20081203081706-5qbjg3byznz63qn4

Tags: innodb-plugin-1.0.1

Imported 1.0.1 with clean - with no changes.

files added:
storage/innobase

storage/innobase/CMakeLists.txt

storage/innobase/COPYING

storage/innobase/ChangeLog

storage/innobase/Makefile.am

storage/innobase/README

storage/innobase/btr

storage/innobase/btr/btr0btr.c

storage/innobase/btr/btr0cur.c

storage/innobase/btr/btr0pcur.c

storage/innobase/btr/btr0sea.c

storage/innobase/buf

storage/innobase/buf/buf0buddy.c

storage/innobase/buf/buf0buf.c

storage/innobase/buf/buf0flu.c

storage/innobase/buf/buf0lru.c

storage/innobase/buf/buf0rea.c

storage/innobase/compile-innodb

storage/innobase/compile-innodb-debug

storage/innobase/data

storage/innobase/data/data0data.c

storage/innobase/data/data0type.c

storage/innobase/dict

storage/innobase/dict/dict0boot.c

storage/innobase/dict/dict0crea.c

storage/innobase/dict/dict0dict.c

storage/innobase/dict/dict0load.c

storage/innobase/dict/dict0mem.c

storage/innobase/dyn

storage/innobase/dyn/dyn0dyn.c

storage/innobase/eval

storage/innobase/eval/eval0eval.c

storage/innobase/eval/eval0proc.c

storage/innobase/fil

storage/innobase/fil/fil0fil.c

storage/innobase/fsp

storage/innobase/fsp/fsp0fsp.c

storage/innobase/fut

storage/innobase/fut/fut0fut.c

storage/innobase/fut/fut0lst.c

storage/innobase/ha

storage/innobase/ha/ha0ha.c

storage/innobase/ha/ha0storage.c

storage/innobase/ha/hash0hash.c

storage/innobase/handler

storage/innobase/handler/ha_innodb.cc

storage/innobase/handler/ha_innodb.h

storage/innobase/handler/handler0alter.cc

storage/innobase/handler/i_s.cc

storage/innobase/handler/i_s.h

storage/innobase/handler/mysql_addons.cc

storage/innobase/ibuf

storage/innobase/ibuf/ibuf0ibuf.c

storage/innobase/include

storage/innobase/include/btr0btr.h

storage/innobase/include/btr0btr.ic

storage/innobase/include/btr0cur.h

storage/innobase/include/btr0cur.ic

storage/innobase/include/btr0pcur.h

storage/innobase/include/btr0pcur.ic

storage/innobase/include/btr0sea.h

storage/innobase/include/btr0sea.ic

storage/innobase/include/btr0types.h

storage/innobase/include/buf0buddy.h

storage/innobase/include/buf0buddy.ic

storage/innobase/include/buf0buf.h

storage/innobase/include/buf0buf.ic

storage/innobase/include/buf0flu.h

storage/innobase/include/buf0flu.ic

storage/innobase/include/buf0lru.h

storage/innobase/include/buf0lru.ic

storage/innobase/include/buf0rea.h

storage/innobase/include/buf0types.h

storage/innobase/include/data0data.h

storage/innobase/include/data0data.ic

storage/innobase/include/data0type.h

storage/innobase/include/data0type.ic

storage/innobase/include/data0types.h

storage/innobase/include/db0err.h

storage/innobase/include/dict0boot.h

storage/innobase/include/dict0boot.ic

storage/innobase/include/dict0crea.h

storage/innobase/include/dict0crea.ic

storage/innobase/include/dict0dict.h

storage/innobase/include/dict0dict.ic

storage/innobase/include/dict0load.h

storage/innobase/include/dict0load.ic

storage/innobase/include/dict0mem.h

storage/innobase/include/dict0mem.ic

storage/innobase/include/dict0types.h

storage/innobase/include/dyn0dyn.h

storage/innobase/include/dyn0dyn.ic

storage/innobase/include/eval0eval.h

storage/innobase/include/eval0eval.ic

storage/innobase/include/eval0proc.h

storage/innobase/include/eval0proc.ic

storage/innobase/include/fil0fil.h

storage/innobase/include/fsp0fsp.h

storage/innobase/include/fsp0fsp.ic

storage/innobase/include/fut0fut.h

storage/innobase/include/fut0fut.ic

storage/innobase/include/fut0lst.h

storage/innobase/include/fut0lst.ic

storage/innobase/include/ha0ha.h

storage/innobase/include/ha0ha.ic

storage/innobase/include/ha0storage.h

storage/innobase/include/ha0storage.ic

storage/innobase/include/ha_prototypes.h

storage/innobase/include/handler0alter.h

storage/innobase/include/hash0hash.h

storage/innobase/include/hash0hash.ic

storage/innobase/include/ibuf0ibuf.h

storage/innobase/include/ibuf0ibuf.ic

storage/innobase/include/ibuf0types.h

storage/innobase/include/lock0iter.h

storage/innobase/include/lock0lock.h

storage/innobase/include/lock0lock.ic

storage/innobase/include/lock0priv.h

storage/innobase/include/lock0priv.ic

storage/innobase/include/lock0types.h

storage/innobase/include/log0log.h

storage/innobase/include/log0log.ic

storage/innobase/include/log0recv.h

storage/innobase/include/log0recv.ic

storage/innobase/include/mach0data.h

storage/innobase/include/mach0data.ic

storage/innobase/include/mem0dbg.h

storage/innobase/include/mem0dbg.ic

storage/innobase/include/mem0mem.h

storage/innobase/include/mem0mem.ic

storage/innobase/include/mem0pool.h

storage/innobase/include/mem0pool.ic

storage/innobase/include/mtr0log.h

storage/innobase/include/mtr0log.ic

storage/innobase/include/mtr0mtr.h

storage/innobase/include/mtr0mtr.ic

storage/innobase/include/mtr0types.h

storage/innobase/include/mysql_addons.h

storage/innobase/include/os0file.h

storage/innobase/include/os0proc.h

storage/innobase/include/os0proc.ic

storage/innobase/include/os0sync.h

storage/innobase/include/os0sync.ic

storage/innobase/include/os0thread.h

storage/innobase/include/os0thread.ic

storage/innobase/include/page0cur.h

storage/innobase/include/page0cur.ic

storage/innobase/include/page0page.h

storage/innobase/include/page0page.ic

storage/innobase/include/page0types.h

storage/innobase/include/page0zip.h

storage/innobase/include/page0zip.ic

storage/innobase/include/pars0grm.h

storage/innobase/include/pars0opt.h

storage/innobase/include/pars0opt.ic

storage/innobase/include/pars0pars.h

storage/innobase/include/pars0pars.ic

storage/innobase/include/pars0sym.h

storage/innobase/include/pars0sym.ic

storage/innobase/include/pars0types.h

storage/innobase/include/que0que.h

storage/innobase/include/que0que.ic

storage/innobase/include/que0types.h

storage/innobase/include/read0read.h

storage/innobase/include/read0read.ic

storage/innobase/include/read0types.h

storage/innobase/include/rem0cmp.h

storage/innobase/include/rem0cmp.ic

storage/innobase/include/rem0rec.h

storage/innobase/include/rem0rec.ic

storage/innobase/include/rem0types.h

storage/innobase/include/row0ext.h

storage/innobase/include/row0ext.ic

storage/innobase/include/row0ins.h

storage/innobase/include/row0ins.ic

storage/innobase/include/row0merge.h

storage/innobase/include/row0mysql.h

storage/innobase/include/row0mysql.ic

storage/innobase/include/row0purge.h

storage/innobase/include/row0purge.ic

storage/innobase/include/row0row.h

storage/innobase/include/row0row.ic

storage/innobase/include/row0sel.h

storage/innobase/include/row0sel.ic

storage/innobase/include/row0types.h

storage/innobase/include/row0uins.h

storage/innobase/include/row0uins.ic

storage/innobase/include/row0umod.h

storage/innobase/include/row0umod.ic

storage/innobase/include/row0undo.h

storage/innobase/include/row0undo.ic

storage/innobase/include/row0upd.h

storage/innobase/include/row0upd.ic

storage/innobase/include/row0vers.h

storage/innobase/include/row0vers.ic

storage/innobase/include/srv0que.h

storage/innobase/include/srv0srv.h

storage/innobase/include/srv0srv.ic

storage/innobase/include/srv0start.h

storage/innobase/include/sync0arr.h

storage/innobase/include/sync0arr.ic

storage/innobase/include/sync0rw.h

storage/innobase/include/sync0rw.ic

storage/innobase/include/sync0sync.h

storage/innobase/include/sync0sync.ic

storage/innobase/include/sync0types.h

storage/innobase/include/thr0loc.h

storage/innobase/include/thr0loc.ic

storage/innobase/include/trx0i_s.h

storage/innobase/include/trx0purge.h

storage/innobase/include/trx0purge.ic

storage/innobase/include/trx0rec.h

storage/innobase/include/trx0rec.ic

storage/innobase/include/trx0roll.h

storage/innobase/include/trx0roll.ic

storage/innobase/include/trx0rseg.h

storage/innobase/include/trx0rseg.ic

storage/innobase/include/trx0sys.h

storage/innobase/include/trx0sys.ic

storage/innobase/include/trx0trx.h

storage/innobase/include/trx0trx.ic

storage/innobase/include/trx0types.h

storage/innobase/include/trx0undo.h

storage/innobase/include/trx0undo.ic

storage/innobase/include/trx0xa.h

storage/innobase/include/univ.i

storage/innobase/include/usr0sess.h

storage/innobase/include/usr0sess.ic

storage/innobase/include/usr0types.h

storage/innobase/include/ut0byte.h

storage/innobase/include/ut0byte.ic

storage/innobase/include/ut0dbg.h

storage/innobase/include/ut0list.h

storage/innobase/include/ut0list.ic

storage/innobase/include/ut0lst.h

storage/innobase/include/ut0mem.h

storage/innobase/include/ut0mem.ic

storage/innobase/include/ut0rnd.h

storage/innobase/include/ut0rnd.ic

storage/innobase/include/ut0sort.h

storage/innobase/include/ut0ut.h

storage/innobase/include/ut0ut.ic

storage/innobase/include/ut0vec.h

storage/innobase/include/ut0vec.ic

storage/innobase/include/ut0wqueue.h

storage/innobase/lock

storage/innobase/lock/lock0iter.c

storage/innobase/lock/lock0lock.c

storage/innobase/log

storage/innobase/log/log0log.c

storage/innobase/log/log0recv.c

storage/innobase/mach

storage/innobase/mach/mach0data.c

storage/innobase/mem

storage/innobase/mem/mem0dbg.c

storage/innobase/mem/mem0mem.c

storage/innobase/mem/mem0pool.c

storage/innobase/mtr

storage/innobase/mtr/mtr0log.c

storage/innobase/mtr/mtr0mtr.c

storage/innobase/mysql-test

storage/innobase/mysql-test/ctype_innodb_like.inc

storage/innobase/mysql-test/have_innodb.inc

storage/innobase/mysql-test/innodb-autoinc.result

storage/innobase/mysql-test/innodb-autoinc.test

storage/innobase/mysql-test/innodb-index-master.opt

storage/innobase/mysql-test/innodb-index.inc

storage/innobase/mysql-test/innodb-index.result

storage/innobase/mysql-test/innodb-index.test

storage/innobase/mysql-test/innodb-index_ucs2.result

storage/innobase/mysql-test/innodb-index_ucs2.test

storage/innobase/mysql-test/innodb-lock.result

storage/innobase/mysql-test/innodb-lock.test

storage/innobase/mysql-test/innodb-master.opt

storage/innobase/mysql-test/innodb-replace.result

storage/innobase/mysql-test/innodb-replace.test

storage/innobase/mysql-test/innodb-semi-consistent-master.opt

storage/innobase/mysql-test/innodb-semi-consistent.result

storage/innobase/mysql-test/innodb-semi-consistent.test

storage/innobase/mysql-test/innodb-zip.result

storage/innobase/mysql-test/innodb-zip.test

storage/innobase/mysql-test/innodb.result

storage/innobase/mysql-test/innodb.test

storage/innobase/mysql-test/innodb_bug34053.result

storage/innobase/mysql-test/innodb_bug34053.test

storage/innobase/mysql-test/innodb_bug34300.result

storage/innobase/mysql-test/innodb_bug34300.test

storage/innobase/mysql-test/innodb_bug35220.result

storage/innobase/mysql-test/innodb_bug35220.test

storage/innobase/mysql-test/innodb_bug36169.result

storage/innobase/mysql-test/innodb_bug36169.test

storage/innobase/mysql-test/innodb_information_schema.result

storage/innobase/mysql-test/innodb_information_schema.test

storage/innobase/mysql-test/innodb_trx_weight.inc

storage/innobase/mysql-test/innodb_trx_weight.result

storage/innobase/mysql-test/innodb_trx_weight.test

storage/innobase/os

storage/innobase/os/os0file.c

storage/innobase/os/os0proc.c

storage/innobase/os/os0sync.c

storage/innobase/os/os0thread.c

storage/innobase/page

storage/innobase/page/page0cur.c

storage/innobase/page/page0page.c

storage/innobase/page/page0zip.c

storage/innobase/pars

storage/innobase/pars/lexyy.c

storage/innobase/pars/make_bison.sh

storage/innobase/pars/make_flex.sh

storage/innobase/pars/pars0grm.c

storage/innobase/pars/pars0grm.y

storage/innobase/pars/pars0lex.l

storage/innobase/pars/pars0opt.c

storage/innobase/pars/pars0pars.c

storage/innobase/pars/pars0sym.c

storage/innobase/plug.in

storage/innobase/que

storage/innobase/que/que0que.c

storage/innobase/read

storage/innobase/read/read0read.c

storage/innobase/rem

storage/innobase/rem/rem0cmp.c

storage/innobase/rem/rem0rec.c

storage/innobase/row

storage/innobase/row/row0ext.c

storage/innobase/row/row0ins.c

storage/innobase/row/row0merge.c

storage/innobase/row/row0mysql.c

storage/innobase/row/row0purge.c

storage/innobase/row/row0row.c

storage/innobase/row/row0sel.c

storage/innobase/row/row0uins.c

storage/innobase/row/row0umod.c

storage/innobase/row/row0undo.c

storage/innobase/row/row0upd.c

storage/innobase/row/row0vers.c

storage/innobase/scripts

storage/innobase/scripts/install_innodb_plugins.sql

storage/innobase/setup.sh

storage/innobase/srv

storage/innobase/srv/srv0que.c

storage/innobase/srv/srv0srv.c

storage/innobase/srv/srv0start.c

storage/innobase/sync

storage/innobase/sync/sync0arr.c

storage/innobase/sync/sync0rw.c

storage/innobase/sync/sync0sync.c

storage/innobase/thr

storage/innobase/thr/thr0loc.c

storage/innobase/trx

storage/innobase/trx/trx0i_s.c

storage/innobase/trx/trx0purge.c

storage/innobase/trx/trx0rec.c

storage/innobase/trx/trx0roll.c

storage/innobase/trx/trx0rseg.c

storage/innobase/trx/trx0sys.c

storage/innobase/trx/trx0trx.c

storage/innobase/trx/trx0undo.c

storage/innobase/usr

storage/innobase/usr/usr0sess.c

storage/innobase/ut

storage/innobase/ut/ut0byte.c

storage/innobase/ut/ut0dbg.c

storage/innobase/ut/ut0list.c

storage/innobase/ut/ut0mem.c

storage/innobase/ut/ut0rnd.c

storage/innobase/ut/ut0ut.c

storage/innobase/ut/ut0vec.c

storage/innobase/ut/ut0wqueue.c

Show diffs side-by-side

added added

removed removed

storage/innobase/row/row0merge.c

/******************************************************

New index creation routines using a merge sort

Created 12/4/2005 Jan Lindstrom

Completed by Sunny Bains and Marko Makela

*******************************************************/

#include "row0merge.h"

#include "row0ext.h"

#include "row0row.h"

#include "row0upd.h"

#include "row0ins.h"

#include "row0sel.h"

#include "dict0dict.h"

#include "dict0mem.h"

#include "dict0boot.h"

#include "dict0crea.h"

#include "dict0load.h"

#include "btr0btr.h"

#include "mach0data.h"

#include "trx0rseg.h"

#include "trx0trx.h"

#include "trx0roll.h"

#include "trx0undo.h"

#include "trx0purge.h"

#include "trx0rec.h"

#include "que0que.h"

#include "rem0cmp.h"

#include "read0read.h"

#include "os0file.h"

#include "lock0lock.h"

#include "data0data.h"

#include "data0type.h"

#include "que0que.h"

#include "pars0pars.h"

#include "mem0mem.h"

#include "log0log.h"

#include "ut0sort.h"

#include "handler0alter.h"

#ifdef UNIV_DEBUG

/* Set these in order ot enable debug printout. */

static ibool row_merge_print_cmp;

static ibool row_merge_print_read;

static ibool row_merge_print_write;

#endif /* UNIV_DEBUG */

/* Block size for I/O operations in merge sort. The minimum is

UNIV_PAGE_SIZE, or page_get_free_space_of_empty() rounded to a power of 2.

When not creating a PRIMARY KEY that contains column prefixes, this

can be set as small as UNIV_PAGE_SIZE / 2. See the comment above

ut_ad(data_size < sizeof(row_merge_block_t)). */

typedef byte row_merge_block_t[1048576];

/* Secondary buffer for I/O operations of merge records. This buffer

is used for writing or reading a record that spans two row_merge_block_t.

Thus, it must be able to hold one merge record, whose maximum size is

the same as the minimum size of row_merge_block_t. */

typedef byte mrec_buf_t[UNIV_PAGE_SIZE];

/* Merge record in row_merge_block_t. The format is the same as a

record in ROW_FORMAT=COMPACT with the exception that the

REC_N_NEW_EXTRA_BYTES are omitted. */

typedef byte mrec_t;

/* Buffer for sorting in main memory. */

struct row_merge_buf_struct {

mem_heap_t* heap; /* memory heap where allocated */

dict_index_t* index; /* the index the tuples belong to */

ulint total_size; /* total amount of data bytes */

ulint n_tuples; /* number of data tuples */

ulint max_tuples; /* maximum number of data tuples */

const dfield_t**tuples; /* array of pointers to

arrays of fields that form

the data tuples */

const dfield_t**tmp_tuples; /* temporary copy of tuples,

for sorting */

};

typedef struct row_merge_buf_struct row_merge_buf_t;

/* Information about temporary files used in merge sort are stored

to this structure */

struct merge_file_struct {

int fd; /* File descriptor */

ulint offset; /* File offset */

};

typedef struct merge_file_struct merge_file_t;

#ifdef UNIV_DEBUG

/**********************************************************

Display a merge tuple. */

100

static

101

void

102

row_merge_tuple_print(

103

/*==================*/

104

FILE* f, /* in: output stream */

105

const dfield_t* entry, /* in: tuple to print */

106

ulint n_fields)/* in: number of fields in the tuple */

107

{

108

ulint j;

109

110

for (j = 0; j < n_fields; j++) {

111

const dfield_t* field = &entry[j];

112

113

if (dfield_is_null(field)) {

114

fputs("\n NULL;", f);

115

} else {

116

ulint field_len = dfield_get_len(field);

117

ulint len = ut_min(field_len, 20);

118

if (dfield_is_ext(field)) {

119

fputs("\nE", f);

120

} else {

121

fputs("\n ", f);

122

}

123

ut_print_buf(f, dfield_get_data(field), len);

124

if (len != field_len) {

125

fprintf(f, " (total %lu bytes)", field_len);

126

}

127

}

128

}

129

putc('\n', f);

130

}

131

#endif /* UNIV_DEBUG */

132

133

/**********************************************************

134

Allocate a sort buffer. */

135

static

136

row_merge_buf_t*

137

row_merge_buf_create_low(

138

/*=====================*/

139

/* out,own: sort buffer */

140

mem_heap_t* heap, /* in: heap where allocated */

141

dict_index_t* index, /* in: secondary index */

142

ulint max_tuples, /* in: maximum number of data tuples */

143

ulint buf_size) /* in: size of the buffer, in bytes */

144

{

145

row_merge_buf_t* buf;

146

147

ut_ad(max_tuples > 0);

148

ut_ad(max_tuples <= sizeof(row_merge_block_t));

149

ut_ad(max_tuples < buf_size);

150

151

buf = mem_heap_zalloc(heap, buf_size);

152

buf->heap = heap;

153

buf->index = index;

154

buf->max_tuples = max_tuples;

155

buf->tuples = mem_heap_alloc(heap,

156

2 * max_tuples * sizeof *buf->tuples);

157

buf->tmp_tuples = buf->tuples + max_tuples;

158

159

return(buf);

160

}

161

162

/**********************************************************

163

Allocate a sort buffer. */

164

static

165

row_merge_buf_t*

166

row_merge_buf_create(

167

/*=================*/

168

/* out,own: sort buffer */

169

dict_index_t* index) /* in: secondary index */

170

{

171

row_merge_buf_t* buf;

172

ulint max_tuples;

173

ulint buf_size;

174

mem_heap_t* heap;

175

176

max_tuples = sizeof(row_merge_block_t)

177

/ ut_max(1, dict_index_get_min_size(index));

178

179

buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;

180

181

heap = mem_heap_create(buf_size + sizeof(row_merge_block_t));

182

183

buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);

184

185

return(buf);

186

}

187

188

/**********************************************************

189

Empty a sort buffer. */

190

static

191

row_merge_buf_t*

192

row_merge_buf_empty(

193

/*================*/

194

/* out: sort buffer */

195

row_merge_buf_t* buf) /* in,own: sort buffer */

196

{

197

ulint buf_size;

198

ulint max_tuples = buf->max_tuples;

199

mem_heap_t* heap = buf->heap;

200

dict_index_t* index = buf->index;

201

202

buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;

203

204

mem_heap_empty(heap);

205

206

return(row_merge_buf_create_low(heap, index, max_tuples, buf_size));

207

}

208

209

/**********************************************************

210

Deallocate a sort buffer. */

211

static

212

void

213

row_merge_buf_free(

214

/*===============*/

215

row_merge_buf_t* buf) /* in,own: sort buffer, to be freed */

216

{

217

mem_heap_free(buf->heap);

218

}

219

220

/**********************************************************

221

Insert a data tuple into a sort buffer. */

222

static

223

ibool

224

row_merge_buf_add(

225

/*==============*/

226

/* out: TRUE if added,

227

FALSE if out of space */

228

row_merge_buf_t* buf, /* in/out: sort buffer */

229

const dtuple_t* row, /* in: row in clustered index */

230

const row_ext_t* ext) /* in: cache of externally stored

231

column prefixes, or NULL */

232

{

233

ulint i;

234

ulint n_fields;

235

ulint data_size;

236

ulint extra_size;

237

const dict_index_t* index;

238

dfield_t* entry;

239

dfield_t* field;

240

241

if (buf->n_tuples >= buf->max_tuples) {

242

return(FALSE);

243

}

244

245

UNIV_PREFETCH_R(row->fields);

246

247

index = buf->index;

248

249

n_fields = dict_index_get_n_fields(index);

250

251

entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);

252

buf->tuples[buf->n_tuples] = entry;

253

field = entry;

254

255

data_size = 0;

256

extra_size = UT_BITS_IN_BYTES(index->n_nullable);

257

258

for (i = 0; i < n_fields; i++, field++) {

259

const dict_field_t* ifield;

260

const dict_col_t* col;

261

ulint col_no;

262

const dfield_t* row_field;

263

ulint len;

264

265

ifield = dict_index_get_nth_field(index, i);

266

col = ifield->col;

267

col_no = dict_col_get_no(col);

268

row_field = dtuple_get_nth_field(row, col_no);

269

dfield_copy(field, row_field);

270

len = dfield_get_len(field);

271

272

if (dfield_is_null(field)) {

273

ut_ad(!(col->prtype & DATA_NOT_NULL));

274

continue;

275

} else if (UNIV_LIKELY(!ext)) {

276

} else if (dict_index_is_clust(index)) {

277

/* Flag externally stored fields. */

278

const byte* buf = row_ext_lookup(ext, col_no,

279

&len);

280

if (UNIV_LIKELY_NULL(buf)) {

281

ut_a(buf != field_ref_zero);

282

if (i < dict_index_get_n_unique(index)) {

283

dfield_set_data(field, buf, len);

284

} else {

285

dfield_set_ext(field);

286

len = dfield_get_len(field);

287

}

288

}

289

} else {

290

const byte* buf = row_ext_lookup(ext, col_no,

291

&len);

292

if (UNIV_LIKELY_NULL(buf)) {

293

ut_a(buf != field_ref_zero);

294

dfield_set_data(field, buf, len);

295

}

296

}

297

298

/* If a column prefix index, take only the prefix */

299

300

if (ifield->prefix_len) {

301

len = dtype_get_at_most_n_mbchars(

302

col->prtype,

303

col->mbminlen, col->mbmaxlen,

304

ifield->prefix_len,

305

len, dfield_get_data(field));

306

dfield_set_len(field, len);

307

}

308

309

ut_ad(len <= col->len || col->mtype == DATA_BLOB);

310

311

if (ifield->fixed_len) {

312

ut_ad(len == ifield->fixed_len);

313

ut_ad(!dfield_is_ext(field));

314

} else if (dfield_is_ext(field)) {

315

extra_size += 2;

316

} else if (len < 128

317

|| (col->len < 256 && col->mtype != DATA_BLOB)) {

318

extra_size++;

319

} else {

320

/* For variable-length columns, we look up the

321

maximum length from the column itself. If this

322

is a prefix index column shorter than 256 bytes,

323

this will waste one byte. */

324

extra_size += 2;

325

}

326

data_size += len;

327

}

328

329

#ifdef UNIV_DEBUG

330

{

331

ulint size;

332

ulint extra;

333

334

size = rec_get_converted_size_comp(index,

335

REC_STATUS_ORDINARY,

336

entry, n_fields, &extra);

337

338

ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);

339

ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);

340

}

341

#endif /* UNIV_DEBUG */

342

343

/* Add to the total size of the record in row_merge_block_t

344

the encoded length of extra_size and the extra bytes (extra_size).

345

See row_merge_buf_write() for the variable-length encoding

346

of extra_size. */

347

data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);

348

349

/* The following assertion may fail if row_merge_block_t is

350

declared very small and a PRIMARY KEY is being created with

351

many prefix columns. In that case, the record may exceed the

352

page_zip_rec_needs_ext() limit. However, no further columns

353

will be moved to external storage until the record is inserted

354

to the clustered index B-tree. */

355

ut_ad(data_size < sizeof(row_merge_block_t));

356

357

/* Reserve one byte for the end marker of row_merge_block_t. */

358

if (buf->total_size + data_size >= sizeof(row_merge_block_t) - 1) {

359

return(FALSE);

360

}

361

362

buf->total_size += data_size;

363

buf->n_tuples++;

364

365

field = entry;

366

367

/* Copy the data fields. */

368

369

do {

370

dfield_dup(field++, buf->heap);

371

} while (--n_fields);

372

373

return(TRUE);

374

}

375

376

/* Structure for reporting duplicate records. */

377

struct row_merge_dup_struct {

378

const dict_index_t* index; /* index being sorted */

379

TABLE* table; /* MySQL table object */

380

ulint n_dup; /* number of duplicates */

381

};

382

383

typedef struct row_merge_dup_struct row_merge_dup_t;

384

385

/*****************************************************************

386

Report a duplicate key. */

387

static

388

void

389

row_merge_dup_report(

390

/*=================*/

391

row_merge_dup_t* dup, /* in/out: for reporting duplicates */

392

const dfield_t* entry) /* in: duplicate index entry */

393

{

394

mrec_buf_t buf;

395

const dtuple_t* tuple;

396

dtuple_t tuple_store;

397

const rec_t* rec;

398

const dict_index_t* index = dup->index;

399

ulint n_fields= dict_index_get_n_fields(index);

400

mem_heap_t* heap = NULL;

401

ulint offsets_[REC_OFFS_NORMAL_SIZE];

402

ulint* offsets;

403

ulint n_ext;

404

405

if (dup->n_dup++) {

406

/* Only report the first duplicate record,

407

but count all duplicate records. */

408

return;

409

}

410

411

rec_offs_init(offsets_);

412

413

/* Convert the tuple to a record and then to MySQL format. */

414

415

tuple = dtuple_from_fields(&tuple_store, entry, n_fields);

416

n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;

417

418

rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);

419

offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,

420

&heap);

421

422

innobase_rec_to_mysql(dup->table, rec, index, offsets);

423

424

if (UNIV_LIKELY_NULL(heap)) {

425

mem_heap_free(heap);

426

}

427

}

428

429

/*****************************************************************

430

Compare two tuples. */

431

static

432

int

433

row_merge_tuple_cmp(

434

/*================*/

435

/* out: 1, 0, -1 if a is greater,

436

equal, less, respectively, than b */

437

ulint n_field,/* in: number of fields */

438

const dfield_t* a, /* in: first tuple to be compared */

439

const dfield_t* b, /* in: second tuple to be compared */

440

row_merge_dup_t* dup) /* in/out: for reporting duplicates */

441

{

442

int cmp;

443

const dfield_t* field = a;

444

445

do {

446

cmp = cmp_dfield_dfield(a++, b++);

447

} while (!cmp && --n_field);

448

449

if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {

450

row_merge_dup_report(dup, field);

451

}

452

453

return(cmp);

454

}

455

456

/**************************************************************************

457

Merge sort the tuple buffer in main memory. */

458

static

459

void

460

row_merge_tuple_sort(

461

/*=================*/

462

ulint n_field,/* in: number of fields */

463

row_merge_dup_t* dup, /* in/out: for reporting duplicates */

464

const dfield_t** tuples, /* in/out: tuples */

465

const dfield_t** aux, /* in/out: work area */

466

ulint low, /* in: lower bound of the

467

sorting area, inclusive */

468

ulint high) /* in: upper bound of the

469

sorting area, exclusive */

470

{

471

#define row_merge_tuple_sort_ctx(a,b,c,d) \

472

row_merge_tuple_sort(n_field, dup, a, b, c, d)

473

#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)

474

475

UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,

476

tuples, aux, low, high, row_merge_tuple_cmp_ctx);

477

}

478

479

/**********************************************************

480

Sort a buffer. */

481

static

482

void

483

row_merge_buf_sort(

484

/*===============*/

485

row_merge_buf_t* buf, /* in/out: sort buffer */

486

row_merge_dup_t* dup) /* in/out: for reporting duplicates */

487

{

488

row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,

489

buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);

490

}

491

492

/**********************************************************

493

Write a buffer to a block. */

494

static

495

void

496

row_merge_buf_write(

497

/*================*/

498

const row_merge_buf_t* buf, /* in: sorted buffer */

499

#ifdef UNIV_DEBUG

500

const merge_file_t* of, /* in: output file */

501

#endif /* UNIV_DEBUG */

502

row_merge_block_t* block) /* out: buffer for writing to file */

503

#ifndef UNIV_DEBUG

504

# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)

505

#endif /* !UNIV_DEBUG */

506

{

507

const dict_index_t* index = buf->index;

508

ulint n_fields= dict_index_get_n_fields(index);

509

byte* b = &(*block)[0];

510

511

ulint i;

512

513

for (i = 0; i < buf->n_tuples; i++) {

514

ulint size;

515

ulint extra_size;

516

const dfield_t* entry = buf->tuples[i];

517

518

size = rec_get_converted_size_comp(index,

519

REC_STATUS_ORDINARY,

520

entry, n_fields,

521

&extra_size);

522

ut_ad(size > extra_size);

523

ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);

524

extra_size -= REC_N_NEW_EXTRA_BYTES;

525

size -= REC_N_NEW_EXTRA_BYTES;

526

527

/* Encode extra_size + 1 */

528

if (extra_size + 1 < 0x80) {

529

*b++ = (byte) (extra_size + 1);

530

} else {

531

ut_ad((extra_size + 1) < 0x8000);

532

*b++ = (byte) (0x80 | ((extra_size + 1) >> 8));

533

*b++ = (byte) (extra_size + 1);

534

}

535

536

ut_ad(b + size < block[1]);

537

538

rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,

539

REC_STATUS_ORDINARY,

540

entry, n_fields);

541

542

b += size;

543

544

#ifdef UNIV_DEBUG

545

if (row_merge_print_write) {

546

fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",

547

(void*) b, of->fd, (ulong) of->offset,

548

(ulong) i);

549

row_merge_tuple_print(stderr, entry, n_fields);

550

}

551

#endif /* UNIV_DEBUG */

552

}

553

554

/* Write an "end-of-chunk" marker. */

555

ut_a(b < block[1]);

556

ut_a(b == block[0] + buf->total_size);

557

*b++ = 0;

558

#ifdef UNIV_DEBUG_VALGRIND

559

/* The rest of the block is uninitialized. Initialize it

560

to avoid bogus warnings. */

561

memset(b, 0xff, block[1] - b);

562

#endif /* UNIV_DEBUG_VALGRIND */

563

#ifdef UNIV_DEBUG

564

if (row_merge_print_write) {

565

fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",

566

(void*) b, of->fd, (ulong) of->offset);

567

}

568

#endif /* UNIV_DEBUG */

569

}

570

571

/**********************************************************

572

Create a memory heap and allocate space for row_merge_rec_offsets(). */

573

static

574

mem_heap_t*

575

row_merge_heap_create(

576

/*==================*/

577

/* out: memory heap */

578

const dict_index_t* index, /* in: record descriptor */

579

ulint** offsets1, /* out: offsets */

580

ulint** offsets2) /* out: offsets */

581

{

582

ulint i = 1 + REC_OFFS_HEADER_SIZE

583

+ dict_index_get_n_fields(index);

584

mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);

585

586

*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);

587

*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);

588

589

(*offsets1)[0] = (*offsets2)[0] = i;

590

(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);

591

592

return(heap);

593

}

594

595

/**************************************************************************

596

Search an index object by name and column names. If several indexes match,

597

return the index with the max id. */

598

static

599

dict_index_t*

600

row_merge_dict_table_get_index(

601

/*===========================*/

602

/* out: matching index,

603

NULL if not found */

604

dict_table_t* table, /* in: table */

605

const merge_index_def_t*index_def) /* in: index definition */

606

{

607

ulint i;

608

dict_index_t* index;

609

const char** column_names;

610

611

column_names = mem_alloc(index_def->n_fields * sizeof *column_names);

612

613

for (i = 0; i < index_def->n_fields; ++i) {

614

column_names[i] = index_def->fields[i].field_name;

615

}

616

617

index = dict_table_get_index_by_max_id(

618

table, index_def->name, column_names, index_def->n_fields);

619

620

mem_free((void*) column_names);

621

622

return(index);

623

}

624

625

/************************************************************************

626

Read a merge block from the file system. */

627

static

628

ibool

629

row_merge_read(

630

/*===========*/

631

/* out: TRUE if request was

632

successful, FALSE if fail */

633

int fd, /* in: file descriptor */

634

ulint offset, /* in: offset where to read */

635

row_merge_block_t* buf) /* out: data */

636

{

637

ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;

638

ibool success;

639

640

success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,

641

(ulint) (ofs & 0xFFFFFFFF),

642

(ulint) (ofs >> 32),

643

sizeof *buf);

644

if (UNIV_UNLIKELY(!success)) {

645

ut_print_timestamp(stderr);

646

fprintf(stderr,

647

" InnoDB: failed to read merge block at %llu\n", ofs);

648

}

649

650

return(UNIV_LIKELY(success));

651

}

652

653

/************************************************************************

654

Read a merge block from the file system. */

655

static

656

ibool

657

row_merge_write(

658

/*============*/

659

/* out: TRUE if request was

660

successful, FALSE if fail */

661

int fd, /* in: file descriptor */

662

ulint offset, /* in: offset where to write */

663

const void* buf) /* in: data */

664

{

665

ib_uint64_t ofs = ((ib_uint64_t) offset)

666

* sizeof(row_merge_block_t);

667

668

return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,

669

(ulint) (ofs & 0xFFFFFFFF),

670

(ulint) (ofs >> 32),

671

sizeof(row_merge_block_t))));

672

}

673

674

/************************************************************************

675

Read a merge record. */

676

static

677

const byte*

678

row_merge_read_rec(

679

/*===============*/

680

/* out: pointer to next record,

681

or NULL on I/O error

682

or end of list */

683

row_merge_block_t* block, /* in/out: file buffer */

684

mrec_buf_t* buf, /* in/out: secondary buffer */

685

const byte* b, /* in: pointer to record */

686

const dict_index_t* index, /* in: index of the record */

687

int fd, /* in: file descriptor */

688

ulint* foffs, /* in/out: file offset */

689

const mrec_t** mrec, /* out: pointer to merge record,

690

or NULL on end of list

691

(non-NULL on I/O error) */

692

ulint* offsets)/* out: offsets of mrec */

693

{

694

ulint extra_size;

695

ulint data_size;

696

ulint avail_size;

697

698

ut_ad(block);

699

ut_ad(buf);

700

ut_ad(b >= block[0]);

701

ut_ad(b < block[1]);

702

ut_ad(index);

703

ut_ad(foffs);

704

ut_ad(mrec);

705

ut_ad(offsets);

706

707

ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE

708

+ dict_index_get_n_fields(index));

709

710

extra_size = *b++;

711

712

if (UNIV_UNLIKELY(!extra_size)) {

713

/* End of list */

714

*mrec = NULL;

715

#ifdef UNIV_DEBUG

716

if (row_merge_print_read) {

717

fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",

718

(const void*) b, (const void*) block,

719

fd, (ulong) *foffs);

720

}

721

#endif /* UNIV_DEBUG */

722

return(NULL);

723

}

724

725

if (extra_size >= 0x80) {

726

/* Read another byte of extra_size. */

727

728

if (UNIV_UNLIKELY(b >= block[1])) {

729

if (!row_merge_read(fd, ++(*foffs), block)) {

730

err_exit:

731

/* Signal I/O error. */

732

*mrec = b;

733

return(NULL);

734

}

735

736

/* Wrap around to the beginning of the buffer. */

737

b = block[0];

738

}

739

740

extra_size = (extra_size & 0x7f) << 8;

741

extra_size |= *b++;

742

}

743

744

/* Normalize extra_size. Above, value 0 signals "end of list". */

745

extra_size--;

746

747

/* Read the extra bytes. */

748

749

if (UNIV_UNLIKELY(b + extra_size >= block[1])) {

750

/* The record spans two blocks. Copy the entire record

751

to the auxiliary buffer and handle this as a special

752

case. */

753

754

avail_size = block[1] - b;

755

756

memcpy(*buf, b, avail_size);

757

758

if (!row_merge_read(fd, ++(*foffs), block)) {

759

760

goto err_exit;

761

}

762

763

/* Wrap around to the beginning of the buffer. */

764

b = block[0];

765

766

/* Copy the record. */

767

memcpy(*buf + avail_size, b, extra_size - avail_size);

768

b += extra_size - avail_size;

769

770

*mrec = *buf + extra_size;

771

772

rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);

773

774

data_size = rec_offs_data_size(offsets);

775

776

/* These overflows should be impossible given that

777

records are much smaller than either buffer, and

778

the record starts near the beginning of each buffer. */

779

ut_a(extra_size + data_size < sizeof *buf);

780

ut_a(b + data_size < block[1]);

781

782

/* Copy the data bytes. */

783

memcpy(*buf + extra_size, b, data_size);

784

b += data_size;

785

786

goto func_exit;

787

}

788

789

*mrec = b + extra_size;

790

791

rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);

792

793

data_size = rec_offs_data_size(offsets);

794

ut_ad(extra_size + data_size < sizeof *buf);

795

796

b += extra_size + data_size;

797

798

if (UNIV_LIKELY(b < block[1])) {

799

/* The record fits entirely in the block.

800

This is the normal case. */

801

goto func_exit;

802

}

803

804

/* The record spans two blocks. Copy it to buf. */

805

806

b -= extra_size + data_size;

807

avail_size = block[1] - b;

808

memcpy(*buf, b, avail_size);

809

*mrec = *buf + extra_size;

810

rec_offs_make_valid(*mrec, index, offsets);

811

812

if (!row_merge_read(fd, ++(*foffs), block)) {

813

814

goto err_exit;

815

}

816

817

/* Wrap around to the beginning of the buffer. */

818

b = block[0];

819

820

/* Copy the rest of the record. */

821

memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);

822

b += extra_size + data_size - avail_size;

823

824

func_exit:

825

#ifdef UNIV_DEBUG

826

if (row_merge_print_read) {

827

fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",

828

(const void*) b, (const void*) block,

829

fd, (ulong) *foffs);

830

rec_print_comp(stderr, *mrec, offsets);

831

putc('\n', stderr);

832

}

833

#endif /* UNIV_DEBUG */

834

835

return(b);

836

}

837

838

/************************************************************************

839

Write a merge record. */

840

static

841

void

842

row_merge_write_rec_low(

843

/*====================*/

844

byte* b, /* out: buffer */

845

ulint e, /* in: encoded extra_size */

846

#ifdef UNIV_DEBUG

847

ulint size, /* in: total size to write */

848

int fd, /* in: file descriptor */

849

ulint foffs, /* in: file offset */

850

#endif /* UNIV_DEBUG */

851

const mrec_t* mrec, /* in: record to write */

852

const ulint* offsets)/* in: offsets of mrec */

853

#ifndef UNIV_DEBUG

854

# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \

855

row_merge_write_rec_low(b, e, mrec, offsets)

856

#endif /* !UNIV_DEBUG */

857

{

858

#ifdef UNIV_DEBUG

859

const byte* const end = b + size;

860

ut_ad(e == rec_offs_extra_size(offsets) + 1);

861

862

if (row_merge_print_write) {

863

fprintf(stderr, "row_merge_write %p,%d,%lu ",

864

(void*) b, fd, (ulong) foffs);

865

rec_print_comp(stderr, mrec, offsets);

866

putc('\n', stderr);

867

}

868

#endif /* UNIV_DEBUG */

869

870

if (e < 0x80) {

871

*b++ = (byte) e;

872

} else {

873

*b++ = (byte) (0x80 | (e >> 8));

874

*b++ = (byte) e;

875

}

876

877

memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));

878

ut_ad(b + rec_offs_size(offsets) == end);

879

}

880

881

/************************************************************************

882

Write a merge record. */

883

static

884

byte*

885

row_merge_write_rec(

886

/*================*/

887

/* out: pointer to end of block,

888

or NULL on error */

889

row_merge_block_t* block, /* in/out: file buffer */

890

mrec_buf_t* buf, /* in/out: secondary buffer */

891

byte* b, /* in: pointer to end of block */

892

int fd, /* in: file descriptor */

893

ulint* foffs, /* in/out: file offset */

894

const mrec_t* mrec, /* in: record to write */

895

const ulint* offsets)/* in: offsets of mrec */

896

{

897

ulint extra_size;

898

ulint size;

899

ulint avail_size;

900

901

ut_ad(block);

902

ut_ad(buf);

903

ut_ad(b >= block[0]);

904

ut_ad(b < block[1]);

905

ut_ad(mrec);

906

ut_ad(foffs);

907

ut_ad(mrec < block[0] || mrec > block[1]);

908

ut_ad(mrec < buf[0] || mrec > buf[1]);

909

910

/* Normalize extra_size. Value 0 signals "end of list". */

911

extra_size = rec_offs_extra_size(offsets) + 1;

912

913

size = extra_size + (extra_size >= 0x80)

914

+ rec_offs_data_size(offsets);

915

916

if (UNIV_UNLIKELY(b + size >= block[1])) {

917

/* The record spans two blocks.

918

Copy it to the temporary buffer first. */

919

avail_size = block[1] - b;

920

921

row_merge_write_rec_low(buf[0],

922

extra_size, size, fd, *foffs,

923

mrec, offsets);

924

925

/* Copy the head of the temporary buffer, write

926

the completed block, and copy the tail of the

927

record to the head of the new block. */

928

memcpy(b, buf[0], avail_size);

929

930

if (!row_merge_write(fd, (*foffs)++, block)) {

931

return(NULL);

932

}

933

934

UNIV_MEM_INVALID(block[0], sizeof block[0]);

935

936

/* Copy the rest. */

937

b = block[0];

938

memcpy(b, buf[0] + avail_size, size - avail_size);

939

b += size - avail_size;

940

} else {

941

row_merge_write_rec_low(b, extra_size, size, fd, *foffs,

942

mrec, offsets);

943

b += size;

944

}

945

946

return(b);

947

}

948

949

/************************************************************************

950

Write an end-of-list marker. */

951

static

952

byte*

953

row_merge_write_eof(

954

/*================*/

955

/* out: pointer to end of block,

956

or NULL on error */

957

row_merge_block_t* block, /* in/out: file buffer */

958

byte* b, /* in: pointer to end of block */

959

int fd, /* in: file descriptor */

960

ulint* foffs) /* in/out: file offset */

961

{

962

ut_ad(block);

963

ut_ad(b >= block[0]);

964

ut_ad(b < block[1]);

965

ut_ad(foffs);

966

#ifdef UNIV_DEBUG

967

if (row_merge_print_write) {

968

fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",

969

(void*) b, (void*) block, fd, (ulong) *foffs);

970

}

971

#endif /* UNIV_DEBUG */

972

973

*b++ = 0;

974

UNIV_MEM_ASSERT_RW(block[0], b - block[0]);

975

UNIV_MEM_ASSERT_W(block[0], sizeof block[0]);

976

#ifdef UNIV_DEBUG_VALGRIND

977

/* The rest of the block is uninitialized. Initialize it

978

to avoid bogus warnings. */

979

memset(b, 0xff, block[1] - b);

980

#endif /* UNIV_DEBUG_VALGRIND */

981

982

if (!row_merge_write(fd, (*foffs)++, block)) {

983

return(NULL);

984

}

985

986

UNIV_MEM_INVALID(block[0], sizeof block[0]);

987

return(block[0]);

988

}

989

990

/*****************************************************************

991

Compare two merge records. */

992

static

993

int

994

row_merge_cmp(

995

/*==========*/

996

/* out: 1, 0, -1 if

997

mrec1 is greater, equal, less,

998

respectively, than mrec2 */

999

const mrec_t* mrec1, /* in: first merge

1000

record to be compared */

1001

const mrec_t* mrec2, /* in: second merge

1002

record to be compared */

1003

const ulint* offsets1, /* in: first record offsets */

1004

const ulint* offsets2, /* in: second record offsets */

1005

const dict_index_t* index) /* in: index */

1006

{

1007

int cmp;

1008

1009

cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);

1010

1011

#ifdef UNIV_DEBUG

1012

if (row_merge_print_cmp) {

1013

fputs("row_merge_cmp1 ", stderr);

1014

rec_print_comp(stderr, mrec1, offsets1);

1015

fputs("\nrow_merge_cmp2 ", stderr);

1016

rec_print_comp(stderr, mrec2, offsets2);

1017

fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);

1018

}

1019

#endif /* UNIV_DEBUG */

1020

1021

return(cmp);

1022

}

1023

1024

/************************************************************************

1025

Reads clustered index of the table and create temporary files

1026

containing the index entries for the indexes to be built. */

1027

static

1028

ulint

1029

row_merge_read_clustered_index(

1030

/*===========================*/

1031

/* out: DB_SUCCESS or error */

1032

trx_t* trx, /* in: transaction */

1033

TABLE* table, /* in/out: MySQL table object,

1034

for reporting erroneous records */

1035

const dict_table_t* old_table,/* in: table where rows are

1036

read from */

1037

const dict_table_t* new_table,/* in: table where indexes are

1038

created; identical to old_table

1039

unless creating a PRIMARY KEY */

1040

dict_index_t** index, /* in: indexes to be created */

1041

merge_file_t* files, /* in: temporary files */

1042

ulint n_index,/* in: number of indexes to create */

1043

row_merge_block_t* block) /* in/out: file buffer */

1044

{

1045

dict_index_t* clust_index; /* Clustered index */

1046

mem_heap_t* row_heap; /* Heap memory to create

1047

clustered index records */

1048

row_merge_buf_t** merge_buf; /* Temporary list for records*/

1049

btr_pcur_t pcur; /* Persistent cursor on the

1050

clustered index */

1051

mtr_t mtr; /* Mini transaction */

1052

ulint err = DB_SUCCESS;/* Return code */

1053

ulint i;

1054

ulint n_nonnull = 0; /* number of columns

1055

changed to NOT NULL */

1056

ulint* nonnull = NULL; /* NOT NULL columns */

1057

1058

trx->op_info = "reading clustered index";

1059

1060

ut_ad(trx);

1061

ut_ad(old_table);

1062

ut_ad(new_table);

1063

ut_ad(index);

1064

ut_ad(files);

1065

1066

/* Create and initialize memory for record buffers */

1067

1068

merge_buf = mem_alloc(n_index * sizeof *merge_buf);

1069

1070

for (i = 0; i < n_index; i++) {

1071

merge_buf[i] = row_merge_buf_create(index[i]);

1072

}

1073

1074

mtr_start(&mtr);

1075

1076

/* Find the clustered index and create a persistent cursor

1077

based on that. */

1078

1079

clust_index = dict_table_get_first_index(old_table);

1080

1081

btr_pcur_open_at_index_side(

1082

TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);

1083

1084

if (UNIV_UNLIKELY(old_table != new_table)) {

1085

ulint n_cols = dict_table_get_n_cols(old_table);

1086

1087

/* A primary key will be created. Identify the

1088

columns that were flagged NOT NULL in the new table,

1089

so that we can quickly check that the records in the

1090

(old) clustered index do not violate the added NOT

1091

NULL constraints. */

1092

1093

ut_a(n_cols == dict_table_get_n_cols(new_table));

1094

1095

nonnull = mem_alloc(n_cols * sizeof *nonnull);

1096

1097

for (i = 0; i < n_cols; i++) {

1098

if (dict_table_get_nth_col(old_table, i)->prtype

1099

& DATA_NOT_NULL) {

1100

1101

continue;

1102

}

1103

1104

if (dict_table_get_nth_col(new_table, i)->prtype

1105

& DATA_NOT_NULL) {

1106

1107

nonnull[n_nonnull++] = i;

1108

}

1109

}

1110

1111

if (!n_nonnull) {

1112

mem_free(nonnull);

1113

nonnull = NULL;

1114

}

1115

}

1116

1117

row_heap = mem_heap_create(sizeof(mrec_buf_t));

1118

1119

/* Scan the clustered index. */

1120

for (;;) {

1121

const rec_t* rec;

1122

ulint* offsets;

1123

dtuple_t* row = NULL;

1124

row_ext_t* ext;

1125

ibool has_next = TRUE;

1126

1127

btr_pcur_move_to_next_on_page(&pcur);

1128

1129

/* When switching pages, commit the mini-transaction

1130

in order to release the latch on the old page. */

1131

1132

if (btr_pcur_is_after_last_on_page(&pcur)) {

1133

btr_pcur_store_position(&pcur, &mtr);

1134

mtr_commit(&mtr);

1135

mtr_start(&mtr);

1136

btr_pcur_restore_position(BTR_SEARCH_LEAF,

1137

&pcur, &mtr);

1138

has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);

1139

}

1140

1141

if (UNIV_LIKELY(has_next)) {

1142

rec = btr_pcur_get_rec(&pcur);

1143

offsets = rec_get_offsets(rec, clust_index, NULL,

1144

ULINT_UNDEFINED, &row_heap);

1145

1146

/* Skip delete marked records. */

1147

if (rec_get_deleted_flag(

1148

rec, dict_table_is_comp(old_table))) {

1149

continue;

1150

}

1151

1152

srv_n_rows_inserted++;

1153

1154

/* Build a row based on the clustered index. */

1155

1156

row = row_build(ROW_COPY_POINTERS, clust_index,

1157

rec, offsets,

1158

new_table, &ext, row_heap);

1159

1160

if (UNIV_LIKELY_NULL(nonnull)) {

1161

for (i = 0; i < n_nonnull; i++) {

1162

dfield_t* field

1163

= &row->fields[nonnull[i]];

1164

dtype_t* field_type

1165

= dfield_get_type(field);

1166

1167

ut_a(!(field_type->prtype

1168

& DATA_NOT_NULL));

1169

1170

if (dfield_is_null(field)) {

1171

err = DB_PRIMARY_KEY_IS_NULL;

1172

i = 0;

1173

goto err_exit;

1174

}

1175

1176

field_type->prtype |= DATA_NOT_NULL;

1177

}

1178

}

1179

}

1180

1181

/* Build all entries for all the indexes to be created

1182

in a single scan of the clustered index. */

1183

1184

for (i = 0; i < n_index; i++) {

1185

row_merge_buf_t* buf = merge_buf[i];

1186

merge_file_t* file = &files[i];

1187

const dict_index_t* index = buf->index;

1188

1189

if (UNIV_LIKELY

1190

(row && row_merge_buf_add(buf, row, ext))) {

1191

continue;

1192

}

1193

1194

/* The buffer must be sufficiently large

1195

to hold at least one record. */

1196

ut_ad(buf->n_tuples || !has_next);

1197

1198

/* We have enough data tuples to form a block.

1199

Sort them and write to disk. */

1200

1201

if (buf->n_tuples) {

1202

if (dict_index_is_unique(index)) {

1203

row_merge_dup_t dup;

1204

dup.index = buf->index;

1205

dup.table = table;

1206

dup.n_dup = 0;

1207

1208

row_merge_buf_sort(buf, &dup);

1209

1210

if (dup.n_dup) {

1211

err = DB_DUPLICATE_KEY;

1212

err_exit:

1213

trx->error_key_num = i;

1214

goto func_exit;

1215

}

1216

} else {

1217

row_merge_buf_sort(buf, NULL);

1218

}

1219

}

1220

1221

row_merge_buf_write(buf, file, block);

1222

1223

if (!row_merge_write(file->fd, file->offset++,

1224

block)) {

1225

err = DB_OUT_OF_FILE_SPACE;

1226

goto err_exit;

1227

}

1228

1229

UNIV_MEM_INVALID(block[0], sizeof block[0]);

1230

merge_buf[i] = row_merge_buf_empty(buf);

1231

1232

/* Try writing the record again, now that

1233

the buffer has been written out and emptied. */

1234

1235

if (UNIV_UNLIKELY

1236

(row && !row_merge_buf_add(buf, row, ext))) {

1237

/* An empty buffer should have enough

1238

room for at least one record. */

1239

ut_error;

1240

}

1241

}

1242

1243

mem_heap_empty(row_heap);

1244

1245

if (UNIV_UNLIKELY(!has_next)) {

1246

goto func_exit;

1247

}

1248

}

1249

1250

func_exit:

1251

btr_pcur_close(&pcur);

1252

mtr_commit(&mtr);

1253

mem_heap_free(row_heap);

1254

1255

if (UNIV_LIKELY_NULL(nonnull)) {

1256

mem_free(nonnull);

1257

}

1258

1259

for (i = 0; i < n_index; i++) {

1260

row_merge_buf_free(merge_buf[i]);

1261

}

1262

1263

mem_free(merge_buf);

1264

1265

trx->op_info = "";

1266

1267

return(err);

1268

}

1269

1270

/*****************************************************************

1271

Merge two blocks of linked lists on disk and write a bigger block. */

1272

static

1273

ulint

1274

row_merge_blocks(

1275

/*=============*/

1276

/* out: DB_SUCCESS or error code */

1277

const dict_index_t* index, /* in: index being created */

1278

merge_file_t* file, /* in/out: file containing

1279

index entries */

1280

row_merge_block_t* block, /* in/out: 3 buffers */

1281

ulint* foffs0, /* in/out: offset of first

1282

source list in the file */

1283

ulint* foffs1, /* in/out: offset of second

1284

source list in the file */

1285

merge_file_t* of, /* in/out: output file */

1286

TABLE* table) /* in/out: MySQL table, for

1287

reporting erroneous key value

1288

if applicable */

1289

{

1290

mem_heap_t* heap; /* memory heap for offsets0, offsets1 */

1291

1292

mrec_buf_t buf[3]; /* buffer for handling split mrec in block[] */

1293

const byte* b0; /* pointer to block[0] */

1294

const byte* b1; /* pointer to block[1] */

1295

byte* b2; /* pointer to block[2] */

1296

const mrec_t* mrec0; /* merge rec, points to block[0] or buf[0] */

1297

const mrec_t* mrec1; /* merge rec, points to block[1] or buf[1] */

1298

ulint* offsets0;/* offsets of mrec0 */

1299

ulint* offsets1;/* offsets of mrec1 */

1300

1301

heap = row_merge_heap_create(index, &offsets0, &offsets1);

1302

1303

/* Write a record and read the next record. Split the output

1304

file in two halves, which can be merged on the following pass. */

1305

#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END) \

1306

do { \

1307

b2 = row_merge_write_rec(&block[2], &buf[2], b2, \

1308

of->fd, &of->offset, \

1309

mrec##N, offsets##N); \

1310

if (UNIV_UNLIKELY(!b2)) { \

1311

goto corrupt; \

1312

} \

1313

b##N = row_merge_read_rec(&block[N], &buf[N], \

1314

b##N, index, \

1315

file->fd, foffs##N, \

1316

&mrec##N, offsets##N); \

1317

if (UNIV_UNLIKELY(!b##N)) { \

1318

if (mrec##N) { \

1319

goto corrupt; \

1320

} \

1321

AT_END; \

1322

} \

1323

} while (0)

1324

1325

if (!row_merge_read(file->fd, *foffs0, &block[0])

1326

|| !row_merge_read(file->fd, *foffs1, &block[1])) {

1327

corrupt:

1328

mem_heap_free(heap);

1329

return(DB_CORRUPTION);

1330

}

1331

1332

b0 = block[0];

1333

b1 = block[1];

1334

b2 = block[2];

1335

1336

b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,

1337

foffs0, &mrec0, offsets0);

1338

b1 = row_merge_read_rec(&block[1], &buf[1], b1, index, file->fd,

1339

foffs1, &mrec1, offsets1);

1340

if (UNIV_UNLIKELY(!b0 && mrec0)

1341

|| UNIV_UNLIKELY(!b1 && mrec1)) {

1342

1343

goto corrupt;

1344

}

1345

1346

while (mrec0 && mrec1) {

1347

switch (row_merge_cmp(mrec0, mrec1,

1348

offsets0, offsets1, index)) {

1349

case 0:

1350

if (UNIV_UNLIKELY

1351

(dict_index_is_unique(index))) {

1352

innobase_rec_to_mysql(table, mrec0,

1353

index, offsets0);

1354

mem_heap_free(heap);

1355

return(DB_DUPLICATE_KEY);

1356

}

1357

/* fall through */

1358

case -1:

1359

ROW_MERGE_WRITE_GET_NEXT(0, goto merged);

1360

break;

1361

case 1:

1362

ROW_MERGE_WRITE_GET_NEXT(1, goto merged);

1363

break;

1364

default:

1365

ut_error;

1366

}

1367

1368

}

1369

1370

merged:

1371

if (mrec0) {

1372

/* append all mrec0 to output */

1373

for (;;) {

1374

ROW_MERGE_WRITE_GET_NEXT(0, goto done0);

1375

}

1376

}

1377

done0:

1378

if (mrec1) {

1379

/* append all mrec1 to output */

1380

for (;;) {

1381

ROW_MERGE_WRITE_GET_NEXT(1, goto done1);

1382

}

1383

}

1384

done1:

1385

1386

mem_heap_free(heap);

1387

b2 = row_merge_write_eof(&block[2], b2, of->fd, &of->offset);

1388

return(b2 ? DB_SUCCESS : DB_CORRUPTION);

1389

}

1390

1391

/*****************************************************************

1392

Merge disk files. */

1393

static

1394

ulint

1395

row_merge(

1396

/*======*/

1397

/* out: DB_SUCCESS or error code */

1398

const dict_index_t* index, /* in: index being created */

1399

merge_file_t* file, /* in/out: file containing

1400

index entries */

1401

ulint half, /* in: half the file */

1402

row_merge_block_t* block, /* in/out: 3 buffers */

1403

int* tmpfd, /* in/out: temporary file handle */

1404

TABLE* table) /* in/out: MySQL table, for

1405

reporting erroneous key value

1406

if applicable */

1407

{

1408

ulint foffs0; /* first input offset */

1409

ulint foffs1; /* second input offset */

1410

ulint error; /* error code */

1411

merge_file_t of; /* output file */

1412

1413

UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);

1414

ut_ad(half > 0);

1415

1416

of.fd = *tmpfd;

1417

of.offset = 0;

1418

1419

/* Merge blocks to the output file. */

1420

foffs0 = 0;

1421

foffs1 = half;

1422

1423

for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {

1424

error = row_merge_blocks(index, file, block,

1425

&foffs0, &foffs1, &of, table);

1426

1427

if (error != DB_SUCCESS) {

1428

return(error);

1429

}

1430

}

1431

1432

/* Copy the last block, if there is one. */

1433

while (foffs0 < half) {

1434

if (!row_merge_read(file->fd, foffs0++, block)

1435

|| !row_merge_write(of.fd, of.offset++, block)) {

1436

return(DB_CORRUPTION);

1437

}

1438

}

1439

while (foffs1 < file->offset) {

1440

if (!row_merge_read(file->fd, foffs1++, block)

1441

|| !row_merge_write(of.fd, of.offset++, block)) {

1442

return(DB_CORRUPTION);

1443

}

1444

}

1445

1446

/* Swap file descriptors for the next pass. */

1447

*tmpfd = file->fd;

1448

*file = of;

1449

1450

UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);

1451

1452

return(DB_SUCCESS);

1453

}

1454

1455

/*****************************************************************

1456

Merge disk files. */

1457

static

1458

ulint

1459

row_merge_sort(

1460

/*===========*/

1461

/* out: DB_SUCCESS or error code */

1462

const dict_index_t* index, /* in: index being created */

1463

merge_file_t* file, /* in/out: file containing

1464

index entries */

1465

row_merge_block_t* block, /* in/out: 3 buffers */

1466

int* tmpfd, /* in/out: temporary file handle */

1467

TABLE* table) /* in/out: MySQL table, for

1468

reporting erroneous key value

1469

if applicable */

1470

{

1471

ulint blksz; /* block size */

1472

1473

for (blksz = 1; blksz < file->offset; blksz *= 2) {

1474

ulint half;

1475

ulint error;

1476

1477

ut_ad(ut_is_2pow(blksz));

1478

half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);

1479

error = row_merge(index, file, half, block, tmpfd, table);

1480

1481

if (error != DB_SUCCESS) {

1482

return(error);

1483

}

1484

}

1485

1486

return(DB_SUCCESS);

1487

}

1488

1489

/*****************************************************************

1490

Copy externally stored columns to the data tuple. */

1491

static

1492

void

1493

row_merge_copy_blobs(

1494

/*=================*/

1495

const mrec_t* mrec, /* in: merge record */

1496

const ulint* offsets,/* in: offsets of mrec */

1497

ulint zip_size,/* in: compressed page size in bytes, or 0 */

1498

dtuple_t* tuple, /* in/out: data tuple */

1499

mem_heap_t* heap) /* in/out: memory heap */

1500

{

1501

ulint i;

1502

ulint n_fields = dtuple_get_n_fields(tuple);

1503

1504

for (i = 0; i < n_fields; i++) {

1505

ulint len;

1506

const void* data;

1507

dfield_t* field = dtuple_get_nth_field(tuple, i);

1508

1509

if (!dfield_is_ext(field)) {

1510

continue;

1511

}

1512

1513

ut_ad(!dfield_is_null(field));

1514

1515

/* The table is locked during index creation.

1516

Therefore, externally stored columns cannot possibly

1517

be freed between the time the BLOB pointers are read

1518

(row_merge_read_clustered_index()) and dereferenced

1519

(below). */

1520

data = btr_rec_copy_externally_stored_field(

1521

mrec, offsets, zip_size, i, &len, heap);

1522

1523

dfield_set_data(field, data, len);

1524

}

1525

}

1526

1527

/************************************************************************

1528

Read sorted file containing index data tuples and insert these data

1529

tuples to the index */

1530

static

1531

ulint

1532

row_merge_insert_index_tuples(

1533

/*==========================*/

1534

/* out: DB_SUCCESS or error number */

1535

trx_t* trx, /* in: transaction */

1536

dict_index_t* index, /* in: index */

1537

dict_table_t* table, /* in: new table */

1538

ulint zip_size,/* in: compressed page size of

1539

the old table, or 0 if uncompressed */

1540

int fd, /* in: file descriptor */

1541

row_merge_block_t* block) /* in/out: file buffer */

1542

{

1543

mrec_buf_t buf;

1544

const byte* b;

1545

que_thr_t* thr;

1546

ins_node_t* node;

1547

mem_heap_t* tuple_heap;

1548

mem_heap_t* graph_heap;

1549

ulint error = DB_SUCCESS;

1550

ulint foffs = 0;

1551

ulint* offsets;

1552

1553

ut_ad(trx);

1554

ut_ad(index);

1555

ut_ad(table);

1556

1557

/* We use the insert query graph as the dummy graph

1558

needed in the row module call */

1559

1560

trx->op_info = "inserting index entries";

1561

1562

graph_heap = mem_heap_create(500);

1563

node = ins_node_create(INS_DIRECT, table, graph_heap);

1564

1565

thr = pars_complete_graph_for_exec(node, trx, graph_heap);

1566

1567

que_thr_move_to_run_state_for_mysql(thr, trx);

1568

1569

tuple_heap = mem_heap_create(1000);

1570

1571

{

1572

ulint i = 1 + REC_OFFS_HEADER_SIZE

1573

+ dict_index_get_n_fields(index);

1574

offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);

1575

offsets[0] = i;

1576

offsets[1] = dict_index_get_n_fields(index);

1577

}

1578

1579

b = *block;

1580

1581

if (!row_merge_read(fd, foffs, block)) {

1582

error = DB_CORRUPTION;

1583

} else {

1584

for (;;) {

1585

const mrec_t* mrec;

1586

dtuple_t* dtuple;

1587

ulint n_ext;

1588

1589

b = row_merge_read_rec(block, &buf, b, index,

1590

fd, &foffs, &mrec, offsets);

1591

if (UNIV_UNLIKELY(!b)) {

1592

/* End of list, or I/O error */

1593

if (mrec) {

1594

error = DB_CORRUPTION;

1595

}

1596

break;

1597

}

1598

1599

dtuple = row_rec_to_index_entry_low(

1600

mrec, index, offsets, &n_ext, tuple_heap);

1601

1602

if (UNIV_UNLIKELY(n_ext)) {

1603

row_merge_copy_blobs(mrec, offsets, zip_size,

1604

dtuple, tuple_heap);

1605

}

1606

1607

node->row = dtuple;

1608

node->table = table;

1609

node->trx_id = trx->id;

1610

1611

ut_ad(dtuple_validate(dtuple));

1612

1613

do {

1614

thr->run_node = thr;

1615

thr->prev_node = thr->common.parent;

1616

1617

error = row_ins_index_entry(index, dtuple,

1618

0, FALSE, thr);

1619

1620

if (UNIV_LIKELY(error == DB_SUCCESS)) {

1621

1622

goto next_rec;

1623

}

1624

1625

thr->lock_state = QUE_THR_LOCK_ROW;

1626

trx->error_state = error;

1627

que_thr_stop_for_mysql(thr);

1628

thr->lock_state = QUE_THR_LOCK_NOLOCK;

1629

} while (row_mysql_handle_errors(&error, trx,

1630

thr, NULL));

1631

1632

goto err_exit;

1633

next_rec:

1634

mem_heap_empty(tuple_heap);

1635

}

1636

}

1637

1638

que_thr_stop_for_mysql_no_error(thr, trx);

1639

err_exit:

1640

que_graph_free(thr->graph);

1641

1642

trx->op_info = "";

1643

1644

mem_heap_free(tuple_heap);

1645

1646

return(error);

1647

}

1648

1649

/*************************************************************************

1650

Sets an exclusive lock on a table, for the duration of creating indexes. */

1651

UNIV_INTERN

1652

ulint

1653

row_merge_lock_table(

1654

/*=================*/

1655

/* out: error code or DB_SUCCESS */

1656

trx_t* trx, /* in/out: transaction */

1657

dict_table_t* table, /* in: table to lock */

1658

enum lock_mode mode) /* in: LOCK_X or LOCK_S */

1659

{

1660

mem_heap_t* heap;

1661

que_thr_t* thr;

1662

ulint err;

1663

sel_node_t* node;

1664

1665

ut_ad(trx);

1666

ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());

1667

ut_ad(mode == LOCK_X || mode == LOCK_S);

1668

1669

heap = mem_heap_create(512);

1670

1671

trx->op_info = "setting table lock for creating or dropping index";

1672

1673

node = sel_node_create(heap);

1674

thr = pars_complete_graph_for_exec(node, trx, heap);

1675

thr->graph->state = QUE_FORK_ACTIVE;

1676

1677

/* We use the select query graph as the dummy graph needed

1678

in the lock module call */

1679

1680

thr = que_fork_get_first_thr(que_node_get_parent(thr));

1681

que_thr_move_to_run_state_for_mysql(thr, trx);

1682

1683

run_again:

1684

thr->run_node = thr;

1685

thr->prev_node = thr->common.parent;

1686

1687

err = lock_table(0, table, mode, thr);

1688

1689

trx->error_state = err;

1690

1691

if (UNIV_LIKELY(err == DB_SUCCESS)) {

1692

que_thr_stop_for_mysql_no_error(thr, trx);

1693

} else {

1694

que_thr_stop_for_mysql(thr);

1695

1696

if (err != DB_QUE_THR_SUSPENDED) {

1697

ibool was_lock_wait;

1698

1699

was_lock_wait = row_mysql_handle_errors(

1700

&err, trx, thr, NULL);

1701

1702

if (was_lock_wait) {

1703

goto run_again;

1704

}

1705

} else {

1706

que_thr_t* run_thr;

1707

que_node_t* parent;

1708

1709

parent = que_node_get_parent(thr);

1710

run_thr = que_fork_start_command(parent);

1711

1712

ut_a(run_thr == thr);

1713

1714

/* There was a lock wait but the thread was not

1715

in a ready to run or running state. */

1716

trx->error_state = DB_LOCK_WAIT;

1717

1718

goto run_again;

1719

}

1720

}

1721

1722

que_graph_free(thr->graph);

1723

trx->op_info = "";

1724

1725

return(err);

1726

}

1727

1728

/*************************************************************************

1729

Drop an index from the InnoDB system tables. */

1730

UNIV_INTERN

1731

void

1732

row_merge_drop_index(

1733

/*=================*/

1734

dict_index_t* index, /* in: index to be removed */

1735

dict_table_t* table, /* in: table */

1736

trx_t* trx) /* in: transaction handle */

1737

{

1738

ulint err;

1739

ibool dict_lock = FALSE;

1740

pars_info_t* info = pars_info_create();

1741

1742

/* We use the private SQL parser of Innobase to generate the

1743

query graphs needed in deleting the dictionary data from system

1744

tables in Innobase. Deleting a row from SYS_INDEXES table also

1745

frees the file segments of the B-tree associated with the index. */

1746

1747

static const char str1[] =

1748

"PROCEDURE DROP_INDEX_PROC () IS\n"

1749

"BEGIN\n"

1750

"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"

1751

"DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"

1752

" AND TABLE_ID = :tableid;\n"

1753

"END;\n";

1754

1755

ut_ad(index && table && trx);

1756

1757

pars_info_add_dulint_literal(info, "indexid", index->id);

1758

pars_info_add_dulint_literal(info, "tableid", table->id);

1759

1760

trx_start_if_not_started(trx);

1761

trx->op_info = "dropping index";

1762

1763

if (trx->dict_operation_lock_mode == 0) {

1764

row_mysql_lock_data_dictionary(trx);

1765

dict_lock = TRUE;

1766

}

1767

1768

err = que_eval_sql(info, str1, FALSE, trx);

1769

1770

ut_a(err == DB_SUCCESS);

1771

1772

/* Replace this index with another equivalent index for all

1773

foreign key constraints on this table where this index is used */

1774

1775

dict_table_replace_index_in_foreign_list(table, index);

1776

dict_index_remove_from_cache(table, index);

1777

1778

if (dict_lock) {

1779

row_mysql_unlock_data_dictionary(trx);

1780

}

1781

1782

trx->op_info = "";

1783

}

1784

1785

/*************************************************************************

1786

Drop those indexes which were created before an error occurred

1787

when building an index. */

1788

UNIV_INTERN

1789

void

1790

row_merge_drop_indexes(

1791

/*===================*/

1792

trx_t* trx, /* in: transaction */

1793

dict_table_t* table, /* in: table containing the indexes */

1794

dict_index_t** index, /* in: indexes to drop */

1795

ulint num_created) /* in: number of elements in index[] */

1796

{

1797

ulint key_num;

1798

1799

for (key_num = 0; key_num < num_created; key_num++) {

1800

row_merge_drop_index(index[key_num], table, trx);

1801

}

1802

}

1803

1804

/*************************************************************************

1805

Drop all partially created indexes during crash recovery. */

1806

UNIV_INTERN

1807

void

1808

row_merge_drop_temp_indexes(void)

1809

/*=============================*/

1810

{

1811

trx_t* trx;

1812

ulint err;

1813

1814

/* We use the private SQL parser of Innobase to generate the

1815

query graphs needed in deleting the dictionary data from system

1816

tables in Innobase. Deleting a row from SYS_INDEXES table also

1817

frees the file segments of the B-tree associated with the index. */

1818

#if TEMP_INDEX_PREFIX != '\377'

1819

# error "TEMP_INDEX_PREFIX != '\377'"

1820

#endif

1821

static const char drop_temp_indexes[] =

1822

"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"

1823

"indexid CHAR;\n"

1824

"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"

1825

"WHERE SUBSTR(NAME,0,1)='\377' FOR UPDATE;\n"

1826

"BEGIN\n"

1827

"\tOPEN c;\n"

1828

"\tWHILE 1 LOOP\n"

1829

"\t\tFETCH c INTO indexid;\n"

1830

"\t\tIF (SQL % NOTFOUND) THEN\n"

1831

"\t\t\tEXIT;\n"

1832

"\t\tEND IF;\n"

1833

"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"

1834

"\t\tDELETE FROM SYS_INDEXES WHERE CURRENT OF c;\n"

1835

"\tEND LOOP;\n"

1836

"\tCLOSE c;\n"

1837

"\tCOMMIT WORK;\n"

1838

"END;\n";

1839

1840

trx = trx_allocate_for_background();

1841

trx->op_info = "dropping partially created indexes";

1842

row_mysql_lock_data_dictionary(trx);

1843

1844

err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);

1845

ut_a(err == DB_SUCCESS);

1846

1847

row_mysql_unlock_data_dictionary(trx);

1848

trx_free_for_background(trx);

1849

}

1850

1851

/*************************************************************************

1852

Create a merge file. */

1853

static

1854

void

1855

row_merge_file_create(

1856

/*==================*/

1857

merge_file_t* merge_file) /* out: merge file structure */

1858

{

1859

merge_file->fd = innobase_mysql_tmpfile();

1860

merge_file->offset = 0;

1861

}

1862

1863

/*************************************************************************

1864

Destroy a merge file. */

1865

static

1866

void

1867

row_merge_file_destroy(

1868

/*===================*/

1869

merge_file_t* merge_file) /* out: merge file structure */

1870

{

1871

if (merge_file->fd != -1) {

1872

close(merge_file->fd);

1873

merge_file->fd = -1;

1874

}

1875

}

1876

1877

/*************************************************************************

1878

Determine the precise type of a column that is added to a tem

1879

if a column must be constrained NOT NULL. */

1880

UNIV_INLINE

1881

ulint

1882

row_merge_col_prtype(

1883

/*=================*/

1884

/* out: col->prtype, possibly

1885

ORed with DATA_NOT_NULL */

1886

const dict_col_t* col, /* in: column */

1887

const char* col_name, /* in: name of the column */

1888

const merge_index_def_t*index_def) /* in: the index definition

1889

of the primary key */

1890

{

1891

ulint prtype = col->prtype;

1892

ulint i;

1893

1894

ut_ad(index_def->ind_type & DICT_CLUSTERED);

1895

1896

if (prtype & DATA_NOT_NULL) {

1897

1898

return(prtype);

1899

}

1900

1901

/* All columns that are included

1902

in the PRIMARY KEY must be NOT NULL. */

1903

1904

for (i = 0; i < index_def->n_fields; i++) {

1905

if (!strcmp(col_name, index_def->fields[i].field_name)) {

1906

return(prtype | DATA_NOT_NULL);

1907

}

1908

}

1909

1910

return(prtype);

1911

}

1912

1913

/*************************************************************************

1914

Create a temporary table for creating a primary key, using the definition

1915

of an existing table. */

1916

UNIV_INTERN

1917

dict_table_t*

1918

row_merge_create_temporary_table(

1919

/*=============================*/

1920

/* out: table,

1921

or NULL on error */

1922

const char* table_name, /* in: new table name */

1923

const merge_index_def_t*index_def, /* in: the index definition

1924

of the primary key */

1925

const dict_table_t* table, /* in: old table definition */

1926

trx_t* trx) /* in/out: transaction

1927

(sets error_state) */

1928

{

1929

ulint i;

1930

dict_table_t* new_table = NULL;

1931

ulint n_cols = dict_table_get_n_user_cols(table);

1932

ulint error;

1933

mem_heap_t* heap = mem_heap_create(1000);

1934

1935

ut_ad(table_name);

1936

ut_ad(index_def);

1937

ut_ad(table);

1938

ut_ad(mutex_own(&dict_sys->mutex));

1939

1940

new_table = dict_mem_table_create(table_name, 0, n_cols, table->flags);

1941

1942

for (i = 0; i < n_cols; i++) {

1943

const dict_col_t* col;

1944

const char* col_name;

1945

1946

col = dict_table_get_nth_col(table, i);

1947

col_name = dict_table_get_col_name(table, i);

1948

1949

dict_mem_table_add_col(new_table, heap, col_name, col->mtype,

1950

row_merge_col_prtype(col, col_name,

1951

index_def),

1952

col->len);

1953

}

1954

1955

error = row_create_table_for_mysql(new_table, trx);

1956

mem_heap_free(heap);

1957

1958

if (error != DB_SUCCESS) {

1959

trx->error_state = error;

1960

dict_mem_table_free(new_table);

1961

new_table = NULL;

1962

}

1963

1964

return(new_table);

1965

}

1966

1967

/*************************************************************************

1968

Rename the temporary indexes in the dictionary to permanent ones. */

1969

UNIV_INTERN

1970

ulint

1971

row_merge_rename_indexes(

1972

/*=====================*/

1973

/* out: DB_SUCCESS if all OK */

1974

trx_t* trx, /* in/out: transaction */

1975

dict_table_t* table) /* in/out: table with new indexes */

1976

{

1977

ibool dict_lock = FALSE;

1978

ulint err = DB_SUCCESS;

1979

pars_info_t* info = pars_info_create();

1980

1981

/* We use the private SQL parser of Innobase to generate the

1982

query graphs needed in renaming indexes. */

1983

1984

#if TEMP_INDEX_PREFIX != '\377'

1985

# error "TEMP_INDEX_PREFIX != '\377'"

1986

#endif

1987

1988

static const char rename_indexes[] =

1989

"PROCEDURE RENAME_INDEXES_PROC () IS\n"

1990

"BEGIN\n"

1991

"UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"

1992

"WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='\377';\n"

1993

"END;\n";

1994

1995

ut_ad(table && trx);

1996

1997

trx_start_if_not_started(trx);

1998

trx->op_info = "renaming indexes";

1999

2000

pars_info_add_dulint_literal(info, "tableid", table->id);

2001

2002

if (trx->dict_operation_lock_mode == 0) {

2003

row_mysql_lock_data_dictionary(trx);

2004

dict_lock = TRUE;

2005

}

2006

2007

err = que_eval_sql(info, rename_indexes, FALSE, trx);

2008

2009

if (err == DB_SUCCESS) {

2010

dict_index_t* index = dict_table_get_first_index(table);

2011

do {

2012

if (*index->name == TEMP_INDEX_PREFIX) {

2013

index->name++;

2014

}

2015

index = dict_table_get_next_index(index);

2016

} while (index);

2017

}

2018

2019

if (dict_lock) {

2020

row_mysql_unlock_data_dictionary(trx);

2021

}

2022

2023

trx->op_info = "";

2024

2025

return(err);

2026

}

2027

2028

/*************************************************************************

2029

Rename the tables in the data dictionary. */

2030

UNIV_INTERN

2031

ulint

2032

row_merge_rename_tables(

2033

/*====================*/

2034

/* out: error code or DB_SUCCESS */

2035

dict_table_t* old_table, /* in/out: old table, renamed to

2036

tmp_name */

2037

dict_table_t* new_table, /* in/out: new table, renamed to

2038

old_table->name */

2039

const char* tmp_name, /* in: new name for old_table */

2040

trx_t* trx) /* in: transaction handle */

2041

{

2042

ulint err = DB_ERROR;

2043

pars_info_t* info;

2044

const char* old_name= old_table->name;

2045

2046

ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());

2047

ut_ad(old_table != new_table);

2048

ut_ad(mutex_own(&dict_sys->mutex));

2049

2050

trx->op_info = "renaming tables";

2051

trx_start_if_not_started(trx);

2052

2053

/* We use the private SQL parser of Innobase to generate the query

2054

graphs needed in updating the dictionary data in system tables. */

2055

2056

info = pars_info_create();

2057

2058

pars_info_add_str_literal(info, "new_name", new_table->name);

2059

pars_info_add_str_literal(info, "old_name", old_name);

2060

pars_info_add_str_literal(info, "tmp_name", tmp_name);

2061

2062

err = que_eval_sql(info,

2063

"PROCEDURE RENAME_TABLES () IS\n"

2064

"BEGIN\n"

2065

"UPDATE SYS_TABLES SET NAME = :tmp_name\n"

2066

" WHERE NAME = :old_name;\n"

2067

"UPDATE SYS_TABLES SET NAME = :old_name\n"

2068

" WHERE NAME = :new_name;\n"

2069

"END;\n", FALSE, trx);

2070

2071

if (err != DB_SUCCESS) {

2072

2073

goto err_exit;

2074

}

2075

2076

/* The following calls will also rename the .ibd data files if

2077

the tables are stored in a single-table tablespace */

2078

2079

if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)

2080

|| !dict_table_rename_in_cache(new_table, old_name, FALSE)) {

2081

2082

err = DB_ERROR;

2083

goto err_exit;

2084

}

2085

2086

err = dict_load_foreigns(old_name, TRUE);

2087

2088

if (err != DB_SUCCESS) {

2089

err_exit:

2090

trx->error_state = DB_SUCCESS;

2091

trx_general_rollback_for_mysql(trx, FALSE, NULL);

2092

trx->error_state = DB_SUCCESS;

2093

}

2094

2095

trx->op_info = "";

2096

2097

return(err);

2098

}

2099

2100

/*************************************************************************

2101

Create and execute a query graph for creating an index. */

2102

static

2103

ulint

2104

row_merge_create_index_graph(

2105

/*=========================*/

2106

/* out: DB_SUCCESS or error code */

2107

trx_t* trx, /* in: trx */

2108

dict_table_t* table, /* in: table */

2109

dict_index_t* index) /* in: index */

2110

{

2111

ind_node_t* node; /* Index creation node */

2112

mem_heap_t* heap; /* Memory heap */

2113

que_thr_t* thr; /* Query thread */

2114

ulint err;

2115

2116

ut_ad(trx);

2117

ut_ad(table);

2118

ut_ad(index);

2119

2120

heap = mem_heap_create(512);

2121

2122

index->table = table;

2123

node = ind_create_graph_create(index, heap);

2124

thr = pars_complete_graph_for_exec(node, trx, heap);

2125

2126

ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));

2127

2128

que_run_threads(thr);

2129

2130

err = trx->error_state;

2131

2132

que_graph_free((que_t*) que_node_get_parent(thr));

2133

2134

return(err);

2135

}

2136

2137

/*************************************************************************

2138

Create the index and load in to the dictionary. */

2139

UNIV_INTERN

2140

dict_index_t*

2141

row_merge_create_index(

2142

/*===================*/

2143

/* out: index, or NULL on error */

2144

trx_t* trx, /* in/out: trx (sets error_state) */

2145

dict_table_t* table, /* in: the index is on this table */

2146

const merge_index_def_t* /* in: the index definition */

2147

index_def)

2148

{

2149

dict_index_t* index;

2150

ulint err;

2151

ulint n_fields = index_def->n_fields;

2152

ulint i;

2153

2154

/* Create the index prototype, using the passed in def, this is not

2155

a persistent operation. We pass 0 as the space id, and determine at

2156

a lower level the space id where to store the table. */

2157

2158

index = dict_mem_index_create(table->name, index_def->name,

2159

0, index_def->ind_type, n_fields);

2160

2161

ut_a(index);

2162

2163

/* Create the index id, as it will be required when we build

2164

the index. We assign the id here because we want to write an

2165

UNDO record before we insert the entry into SYS_INDEXES. */

2166

ut_a(ut_dulint_is_zero(index->id));

2167

2168

index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);

2169

index->table = table;

2170

2171

for (i = 0; i < n_fields; i++) {

2172

merge_index_field_t* ifield = &index_def->fields[i];

2173

2174

dict_mem_index_add_field(index, ifield->field_name,

2175

ifield->prefix_len);

2176

}

2177

2178

/* Add the index to SYS_INDEXES, this will use the prototype

2179

to create an entry in SYS_INDEXES. */

2180

err = row_merge_create_index_graph(trx, table, index);

2181

2182

if (err == DB_SUCCESS) {

2183

2184

index = row_merge_dict_table_get_index(

2185

table, index_def);

2186

2187

ut_a(index);

2188

2189

#ifdef ROW_MERGE_IS_INDEX_USABLE

2190

/* Note the id of the transaction that created this

2191

index, we use it to restrict readers from accessing

2192

this index, to ensure read consistency. */

2193

index->trx_id = trx->id;

2194

#endif /* ROW_MERGE_IS_INDEX_USABLE */

2195

} else {

2196

index = NULL;

2197

}

2198

2199

return(index);

2200

}

2201

2202

#ifdef ROW_MERGE_IS_INDEX_USABLE

2203

/*************************************************************************

2204

Check if a transaction can use an index. */

2205

UNIV_INTERN

2206

ibool

2207

row_merge_is_index_usable(

2208

/*======================*/

2209

const trx_t* trx, /* in: transaction */

2210

const dict_index_t* index) /* in: index to check */

2211

{

2212

if (!trx->read_view) {

2213

return(TRUE);

2214

}

2215

2216

return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0);

2217

}

2218

#endif /* ROW_MERGE_IS_INDEX_USABLE */

2219

2220

/*************************************************************************

2221

Drop the old table. */

2222

UNIV_INTERN

2223

ulint

2224

row_merge_drop_table(

2225

/*=================*/

2226

/* out: DB_SUCCESS or error code */

2227

trx_t* trx, /* in: transaction */

2228

dict_table_t* table) /* in: table to drop */

2229

{

2230

ulint err = DB_SUCCESS;

2231

ibool dict_locked = FALSE;

2232

2233

if (trx->dict_operation_lock_mode == 0) {

2234

row_mysql_lock_data_dictionary(trx);

2235

dict_locked = TRUE;

2236

}

2237

2238

/* There must be no open transactions on the table. */

2239

ut_a(table->n_mysql_handles_opened == 0);

2240

2241

err = row_drop_table_for_mysql_no_commit(table->name, trx, FALSE);

2242

2243

if (dict_locked) {

2244

row_mysql_unlock_data_dictionary(trx);

2245

}

2246

2247

return(err);

2248

}

2249

2250

/*************************************************************************

2251

Build indexes on a table by reading a clustered index,

2252

creating a temporary file containing index entries, merge sorting

2253

these index entries and inserting sorted index entries to indexes. */

2254

UNIV_INTERN

2255

ulint

2256

row_merge_build_indexes(

2257

/*====================*/

2258

/* out: DB_SUCCESS or error code */

2259

trx_t* trx, /* in: transaction */

2260

dict_table_t* old_table, /* in: table where rows are

2261

read from */

2262

dict_table_t* new_table, /* in: table where indexes are

2263

created; identical to old_table

2264

unless creating a PRIMARY KEY */

2265

dict_index_t** indexes, /* in: indexes to be created */

2266

ulint n_indexes, /* in: size of indexes[] */

2267

TABLE* table) /* in/out: MySQL table, for

2268

reporting erroneous key value

2269

if applicable */

2270

{

2271

merge_file_t* merge_files;

2272

row_merge_block_t* block;

2273

ulint block_size;

2274

ulint i;

2275

ulint error;

2276

int tmpfd;

2277

2278

ut_ad(trx);

2279

ut_ad(old_table);

2280

ut_ad(new_table);

2281

ut_ad(indexes);

2282

ut_ad(n_indexes);

2283

2284

trx_start_if_not_started(trx);

2285

2286

/* Allocate memory for merge file data structure and initialize

2287

fields */

2288

2289

merge_files = mem_alloc(n_indexes * sizeof *merge_files);

2290

block_size = 3 * sizeof *block;

2291

block = os_mem_alloc_large(&block_size);

2292

2293

for (i = 0; i < n_indexes; i++) {

2294

2295

row_merge_file_create(&merge_files[i]);

2296

}

2297

2298

tmpfd = innobase_mysql_tmpfile();

2299

2300

/* Reset the MySQL row buffer that is used when reporting

2301

duplicate keys. */

2302

innobase_rec_reset(table);

2303

2304

/* Read clustered index of the table and create files for

2305

secondary index entries for merge sort */

2306

2307

error = row_merge_read_clustered_index(

2308

trx, table, old_table, new_table, indexes,

2309

merge_files, n_indexes, block);

2310

2311

if (error != DB_SUCCESS) {

2312

2313

goto func_exit;

2314

}

2315

2316

/* Now we have files containing index entries ready for

2317

sorting and inserting. */

2318

2319

for (i = 0; i < n_indexes; i++) {

2320

error = row_merge_sort(indexes[i], &merge_files[i],

2321

block, &tmpfd, table);

2322

2323

if (error == DB_SUCCESS) {

2324

error = row_merge_insert_index_tuples(

2325

trx, indexes[i], new_table,

2326

dict_table_zip_size(old_table),

2327

merge_files[i].fd, block);

2328

}

2329

2330

/* Close the temporary file to free up space. */

2331

row_merge_file_destroy(&merge_files[i]);

2332

2333

if (error != DB_SUCCESS) {

2334

trx->error_key_num = i;

2335

goto func_exit;

2336

}

2337

}

2338

2339

func_exit:

2340

close(tmpfd);

2341

2342

for (i = 0; i < n_indexes; i++) {

2343

row_merge_file_destroy(&merge_files[i]);

2344

}

2345

2346

mem_free(merge_files);

2347

os_mem_free_large(block, block_size);

2348

2349

return(error);

2350

}

Older »