~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
17
/**
18
  @file
19
20
  @brief
21
  Functions for easy reading of records, possible through a cache
22
*/
23
24
#include "mysql_priv.h"
25
26
static int rr_quick(READ_RECORD *info);
27
int rr_sequential(READ_RECORD *info);
28
static int rr_from_tempfile(READ_RECORD *info);
29
static int rr_unpack_from_tempfile(READ_RECORD *info);
30
static int rr_unpack_from_buffer(READ_RECORD *info);
31
static int rr_from_pointers(READ_RECORD *info);
32
static int rr_from_cache(READ_RECORD *info);
33
static int init_rr_cache(THD *thd, READ_RECORD *info);
34
static int rr_cmp(uchar *a,uchar *b);
35
static int rr_index_first(READ_RECORD *info);
36
static int rr_index(READ_RECORD *info);
37
38
39
/**
40
  Initialize READ_RECORD structure to perform full index scan (in forward
41
  direction) using read_record.read_record() interface.
42
43
    This function has been added at late stage and is used only by
44
    UPDATE/DELETE. Other statements perform index scans using
45
    join_read_first/next functions.
46
47
  @param info         READ_RECORD structure to initialize.
48
  @param thd          Thread handle
49
  @param table        Table to be accessed
50
  @param print_error  If true, call table->file->print_error() if an error
51
                      occurs (except for end-of-records error)
52
  @param idx          index to scan
53
*/
54
55
void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
56
                          bool print_error, uint idx)
57
{
58
  empty_record(table);
59
  bzero((char*) info,sizeof(*info));
60
  info->table= table;
61
  info->file=  table->file;
62
  info->record= table->record[0];
63
  info->print_error= print_error;
64
65
  table->status=0;			/* And it's always found */
66
  if (!table->file->inited)
67
    table->file->ha_index_init(idx, 1);
68
  /* read_record will be changed to rr_index in rr_index_first */
69
  info->read_record= rr_index_first;
70
}
71
72
73
/*
74
  init_read_record is used to scan by using a number of different methods.
75
  Which method to use is set-up in this call so that later calls to
76
  the info->read_record will call the appropriate method using a function
77
  pointer.
78
79
  There are five methods that relate completely to the sort function
80
  filesort. The result of a filesort is retrieved using read_record
81
  calls. The other two methods are used for normal table access.
82
83
  The filesort will produce references to the records sorted, these
84
  references can be stored in memory or in a temporary file.
85
86
  The temporary file is normally used when the references doesn't fit into
87
  a properly sized memory buffer. For most small queries the references
88
  are stored in the memory buffer.
89
90
  The temporary file is also used when performing an update where a key is
91
  modified.
92
93
  Methods used when ref's are in memory (using rr_from_pointers):
94
    rr_unpack_from_buffer:
95
    ----------------------
96
      This method is used when table->sort.addon_field is allocated.
97
      This is allocated for most SELECT queries not involving any BLOB's.
98
      In this case the records are fetched from a memory buffer.
99
    rr_from_pointers:
100
    -----------------
101
      Used when the above is not true, UPDATE, DELETE and so forth and
102
      SELECT's involving BLOB's. It is also used when the addon_field
103
      buffer is not allocated due to that its size was bigger than the
104
      session variable max_length_for_sort_data.
105
      In this case the record data is fetched from the handler using the
106
      saved reference using the rnd_pos handler call.
107
108
  Methods used when ref's are in a temporary file (using rr_from_tempfile)
109
    rr_unpack_from_tempfile:
110
    ------------------------
111
      Same as rr_unpack_from_buffer except that references are fetched from
112
      temporary file. Should obviously not really happen other than in
113
      strange configurations.
114
115
    rr_from_tempfile:
116
    -----------------
117
      Same as rr_from_pointers except that references are fetched from
118
      temporary file instead of from 
119
    rr_from_cache:
120
    --------------
121
      This is a special variant of rr_from_tempfile that can be used for
122
      handlers that is not using the HA_FAST_KEY_READ table flag. Instead
123
      of reading the references one by one from the temporary file it reads
124
      a set of them, sorts them and reads all of them into a buffer which
125
      is then used for a number of subsequent calls to rr_from_cache.
126
      It is only used for SELECT queries and a number of other conditions
127
      on table size.
128
129
  All other accesses use either index access methods (rr_quick) or a full
130
  table scan (rr_sequential).
131
  rr_quick:
132
  ---------
133
    rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to
134
    perform an index scan. There are loads of functionality hidden
135
    in these quick classes. It handles all index scans of various kinds.
136
  rr_sequential:
137
  --------------
138
    This is the most basic access method of a table using rnd_init,
139
    rnd_next and rnd_end. No indexes are used.
140
*/
141
void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
142
		      SQL_SELECT *select,
143
		      int use_record_cache, bool print_error)
144
{
145
  IO_CACHE *tempfile;
146
  DBUG_ENTER("init_read_record");
147
148
  bzero((char*) info,sizeof(*info));
149
  info->thd=thd;
150
  info->table=table;
151
  info->file= table->file;
152
  info->forms= &info->table;		/* Only one table */
153
  
154
  if (table->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE &&
155
      !table->sort.addon_field)
156
    VOID(table->file->extra(HA_EXTRA_MMAP));
157
  
158
  if (table->sort.addon_field)
159
  {
160
    info->rec_buf= table->sort.addon_buf;
161
    info->ref_length= table->sort.addon_length;
162
  }
163
  else
164
  {
165
    empty_record(table);
166
    info->record= table->record[0];
167
    info->ref_length= table->file->ref_length;
168
  }
169
  info->select=select;
170
  info->print_error=print_error;
171
  info->ignore_not_found_rows= 0;
172
  table->status=0;			/* And it's always found */
173
174
  if (select && my_b_inited(&select->file))
175
    tempfile= &select->file;
176
  else
177
    tempfile= table->sort.io_cache;
178
  if (tempfile && my_b_inited(tempfile)) // Test if ref-records was used
179
  {
180
    DBUG_PRINT("info",("using rr_from_tempfile"));
181
    info->read_record= (table->sort.addon_field ?
182
                        rr_unpack_from_tempfile : rr_from_tempfile);
183
    info->io_cache=tempfile;
184
    reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
185
    info->ref_pos=table->file->ref;
186
    if (!table->file->inited)
187
      table->file->ha_rnd_init(0);
188
189
    /*
190
      table->sort.addon_field is checked because if we use addon fields,
191
      it doesn't make sense to use cache - we don't read from the table
192
      and table->sort.io_cache is read sequentially
193
    */
194
    if (!table->sort.addon_field &&
195
        ! (specialflag & SPECIAL_SAFE_MODE) &&
196
	thd->variables.read_rnd_buff_size &&
197
	!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
198
	(table->db_stat & HA_READ_ONLY ||
199
	 table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
200
	(ulonglong) table->s->reclength* (table->file->stats.records+
201
                                          table->file->stats.deleted) >
202
	(ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
203
	info->io_cache->end_of_file/info->ref_length * table->s->reclength >
204
	(my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
205
	!table->s->blob_fields &&
206
        info->ref_length <= MAX_REFLENGTH)
207
    {
208
      if (! init_rr_cache(thd, info))
209
      {
210
	DBUG_PRINT("info",("using rr_from_cache"));
211
	info->read_record=rr_from_cache;
212
      }
213
    }
214
  }
215
  else if (select && select->quick)
216
  {
217
    DBUG_PRINT("info",("using rr_quick"));
218
    info->read_record=rr_quick;
219
  }
220
  else if (table->sort.record_pointers)
221
  {
222
    DBUG_PRINT("info",("using record_pointers"));
223
    table->file->ha_rnd_init(0);
224
    info->cache_pos=table->sort.record_pointers;
225
    info->cache_end=info->cache_pos+ 
226
                    table->sort.found_records*info->ref_length;
227
    info->read_record= (table->sort.addon_field ?
228
                        rr_unpack_from_buffer : rr_from_pointers);
229
  }
230
  else
231
  {
232
    DBUG_PRINT("info",("using rr_sequential"));
233
    info->read_record=rr_sequential;
234
    table->file->ha_rnd_init(1);
235
    /* We can use record cache if we don't update dynamic length tables */
236
    if (!table->no_cache &&
237
	(use_record_cache > 0 ||
238
	 (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
239
	 !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
240
	 (use_record_cache < 0 &&
241
	  !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
242
      VOID(table->file->extra_opt(HA_EXTRA_CACHE,
243
				  thd->variables.read_buff_size));
244
  }
245
  /* 
246
    Do condition pushdown for UPDATE/DELETE.
247
    TODO: Remove this from here as it causes two condition pushdown calls 
248
    when we're running a SELECT and the condition cannot be pushed down.
249
  */
250
  if (thd->variables.engine_condition_pushdown && 
251
      select && select->cond && 
252
      (select->cond->used_tables() & table->map) &&
253
      !table->file->pushed_cond)
254
    table->file->cond_push(select->cond);
255
256
  DBUG_VOID_RETURN;
257
} /* init_read_record */
258
259
260
261
void end_read_record(READ_RECORD *info)
262
{                   /* free cache if used */
263
  if (info->cache)
264
  {
265
    my_free_lock((char*) info->cache,MYF(0));
266
    info->cache=0;
267
  }
268
  if (info->table)
269
  {
270
    filesort_free_buffers(info->table,0);
271
    (void) info->file->extra(HA_EXTRA_NO_CACHE);
272
    if (info->read_record != rr_quick) // otherwise quick_range does it
273
      (void) info->file->ha_index_or_rnd_end();
274
    info->table=0;
275
  }
276
}
277
278
static int rr_handle_error(READ_RECORD *info, int error)
279
{
280
  if (error == HA_ERR_END_OF_FILE)
281
    error= -1;
282
  else
283
  {
284
    if (info->print_error)
285
      info->table->file->print_error(error, MYF(0));
286
    if (error < 0)                            // Fix negative BDB errno
287
      error= 1;
288
  }
289
  return error;
290
}
291
292
293
/** Read a record from head-database. */
294
295
static int rr_quick(READ_RECORD *info)
296
{
297
  int tmp;
298
  while ((tmp= info->select->quick->get_next()))
299
  {
300
    if (info->thd->killed)
301
    {
302
      my_error(ER_SERVER_SHUTDOWN, MYF(0));
303
      return 1;
304
    }
305
    if (tmp != HA_ERR_RECORD_DELETED)
306
    {
307
      tmp= rr_handle_error(info, tmp);
308
      break;
309
    }
310
  }
311
  return tmp;
312
}
313
314
315
/**
316
  Reads first row in an index scan.
317
318
  @param info  	Scan info
319
320
  @retval
321
    0   Ok
322
  @retval
323
    -1   End of records
324
  @retval
325
    1   Error
326
*/
327
328
static int rr_index_first(READ_RECORD *info)
329
{
330
  int tmp= info->file->index_first(info->record);
331
  info->read_record= rr_index;
332
  if (tmp)
333
    tmp= rr_handle_error(info, tmp);
334
  return tmp;
335
}
336
337
338
/**
339
  Reads index sequentially after first row.
340
341
  Read the next index record (in forward direction) and translate return
342
  value.
343
344
  @param info  Scan info
345
346
  @retval
347
    0   Ok
348
  @retval
349
    -1   End of records
350
  @retval
351
    1   Error
352
*/
353
354
static int rr_index(READ_RECORD *info)
355
{
356
  int tmp= info->file->index_next(info->record);
357
  if (tmp)
358
    tmp= rr_handle_error(info, tmp);
359
  return tmp;
360
}
361
362
363
int rr_sequential(READ_RECORD *info)
364
{
365
  int tmp;
366
  while ((tmp=info->file->rnd_next(info->record)))
367
  {
368
    if (info->thd->killed)
369
    {
370
      info->thd->send_kill_message();
371
      return 1;
372
    }
373
    /*
374
      rnd_next can return RECORD_DELETED for MyISAM when one thread is
375
      reading and another deleting without locks.
376
    */
377
    if (tmp != HA_ERR_RECORD_DELETED)
378
    {
379
      tmp= rr_handle_error(info, tmp);
380
      break;
381
    }
382
  }
383
  return tmp;
384
}
385
386
387
static int rr_from_tempfile(READ_RECORD *info)
388
{
389
  int tmp;
390
  for (;;)
391
  {
392
    if (my_b_read(info->io_cache,info->ref_pos,info->ref_length))
393
      return -1;					/* End of file */
394
    if (!(tmp=info->file->rnd_pos(info->record,info->ref_pos)))
395
      break;
396
    /* The following is extremely unlikely to happen */
397
    if (tmp == HA_ERR_RECORD_DELETED ||
398
        (tmp == HA_ERR_KEY_NOT_FOUND && info->ignore_not_found_rows))
399
      continue;
400
    tmp= rr_handle_error(info, tmp);
401
    break;
402
  }
403
  return tmp;
404
} /* rr_from_tempfile */
405
406
407
/**
408
  Read a result set record from a temporary file after sorting.
409
410
  The function first reads the next sorted record from the temporary file.
411
  into a buffer. If a success it calls a callback function that unpacks 
412
  the fields values use in the result set from this buffer into their
413
  positions in the regular record buffer.
414
415
  @param info          Reference to the context including record descriptors
416
417
  @retval
418
    0   Record successfully read.
419
  @retval
420
    -1   There is no record to be read anymore.
421
*/
422
423
static int rr_unpack_from_tempfile(READ_RECORD *info)
424
{
425
  if (my_b_read(info->io_cache, info->rec_buf, info->ref_length))
426
    return -1;
427
  TABLE *table= info->table;
428
  (*table->sort.unpack)(table->sort.addon_field, info->rec_buf);
429
430
  return 0;
431
}
432
433
static int rr_from_pointers(READ_RECORD *info)
434
{
435
  int tmp;
436
  uchar *cache_pos;
437
438
  for (;;)
439
  {
440
    if (info->cache_pos == info->cache_end)
441
      return -1;					/* End of file */
442
    cache_pos= info->cache_pos;
443
    info->cache_pos+= info->ref_length;
444
445
    if (!(tmp=info->file->rnd_pos(info->record,cache_pos)))
446
      break;
447
448
    /* The following is extremely unlikely to happen */
449
    if (tmp == HA_ERR_RECORD_DELETED ||
450
        (tmp == HA_ERR_KEY_NOT_FOUND && info->ignore_not_found_rows))
451
      continue;
452
    tmp= rr_handle_error(info, tmp);
453
    break;
454
  }
455
  return tmp;
456
}
457
458
/**
459
  Read a result set record from a buffer after sorting.
460
461
  The function first reads the next sorted record from the sort buffer.
462
  If a success it calls a callback function that unpacks 
463
  the fields values use in the result set from this buffer into their
464
  positions in the regular record buffer.
465
466
  @param info          Reference to the context including record descriptors
467
468
  @retval
469
    0   Record successfully read.
470
  @retval
471
    -1   There is no record to be read anymore.
472
*/
473
474
static int rr_unpack_from_buffer(READ_RECORD *info)
475
{
476
  if (info->cache_pos == info->cache_end)
477
    return -1;                      /* End of buffer */
478
  TABLE *table= info->table;
479
  (*table->sort.unpack)(table->sort.addon_field, info->cache_pos);
480
  info->cache_pos+= info->ref_length;
481
482
  return 0;
483
}
484
	/* cacheing of records from a database */
485
486
static int init_rr_cache(THD *thd, READ_RECORD *info)
487
{
488
  uint rec_cache_size;
489
  DBUG_ENTER("init_rr_cache");
490
491
  info->struct_length= 3+MAX_REFLENGTH;
492
  info->reclength= ALIGN_SIZE(info->table->s->reclength+1);
493
  if (info->reclength < info->struct_length)
494
    info->reclength= ALIGN_SIZE(info->struct_length);
495
496
  info->error_offset= info->table->s->reclength;
497
  info->cache_records= (thd->variables.read_rnd_buff_size /
498
                        (info->reclength+info->struct_length));
499
  rec_cache_size= info->cache_records*info->reclength;
500
  info->rec_cache_size= info->cache_records*info->ref_length;
501
502
  // We have to allocate one more byte to use uint3korr (see comments for it)
503
  if (info->cache_records <= 2 ||
504
      !(info->cache=(uchar*) my_malloc_lock(rec_cache_size+info->cache_records*
505
					   info->struct_length+1,
506
					   MYF(0))))
507
    DBUG_RETURN(1);
508
#ifdef HAVE_purify
509
  // Avoid warnings in qsort
510
  bzero(info->cache,rec_cache_size+info->cache_records* info->struct_length+1);
511
#endif
512
  DBUG_PRINT("info",("Allocated buffert for %d records",info->cache_records));
513
  info->read_positions=info->cache+rec_cache_size;
514
  info->cache_pos=info->cache_end=info->cache;
515
  DBUG_RETURN(0);
516
} /* init_rr_cache */
517
518
519
static int rr_from_cache(READ_RECORD *info)
520
{
521
  register uint i;
522
  ulong length;
523
  my_off_t rest_of_file;
524
  int16 error;
525
  uchar *position,*ref_position,*record_pos;
526
  ulong record;
527
528
  for (;;)
529
  {
530
    if (info->cache_pos != info->cache_end)
531
    {
532
      if (info->cache_pos[info->error_offset])
533
      {
534
	shortget(error,info->cache_pos);
535
	if (info->print_error)
536
	  info->table->file->print_error(error,MYF(0));
537
      }
538
      else
539
      {
540
	error=0;
541
	memcpy(info->record,info->cache_pos,
542
               (size_t) info->table->s->reclength);
543
      }
544
      info->cache_pos+=info->reclength;
545
      return ((int) error);
546
    }
547
    length=info->rec_cache_size;
548
    rest_of_file=info->io_cache->end_of_file - my_b_tell(info->io_cache);
549
    if ((my_off_t) length > rest_of_file)
550
      length= (ulong) rest_of_file;
551
    if (!length || my_b_read(info->io_cache,info->cache,length))
552
    {
553
      DBUG_PRINT("info",("Found end of file"));
554
      return -1;			/* End of file */
555
    }
556
557
    length/=info->ref_length;
558
    position=info->cache;
559
    ref_position=info->read_positions;
560
    for (i=0 ; i < length ; i++,position+=info->ref_length)
561
    {
562
      memcpy(ref_position,position,(size_t) info->ref_length);
563
      ref_position+=MAX_REFLENGTH;
564
      int3store(ref_position,(long) i);
565
      ref_position+=3;
566
    }
567
    my_qsort(info->read_positions, length, info->struct_length,
568
             (qsort_cmp) rr_cmp);
569
570
    position=info->read_positions;
571
    for (i=0 ; i < length ; i++)
572
    {
573
      memcpy(info->ref_pos,position,(size_t) info->ref_length);
574
      position+=MAX_REFLENGTH;
575
      record=uint3korr(position);
576
      position+=3;
577
      record_pos=info->cache+record*info->reclength;
578
      if ((error=(int16) info->file->rnd_pos(record_pos,info->ref_pos)))
579
      {
580
	record_pos[info->error_offset]=1;
581
	shortstore(record_pos,error);
582
	DBUG_PRINT("error",("Got error: %d:%d when reading row",
583
			    my_errno, error));
584
      }
585
      else
586
	record_pos[info->error_offset]=0;
587
    }
588
    info->cache_end=(info->cache_pos=info->cache)+length*info->reclength;
589
  }
590
} /* rr_from_cache */
591
592
593
static int rr_cmp(uchar *a,uchar *b)
594
{
595
  if (a[0] != b[0])
596
    return (int) a[0] - (int) b[0];
597
  if (a[1] != b[1])
598
    return (int) a[1] - (int) b[1];
599
  if (a[2] != b[2])
600
    return (int) a[2] - (int) b[2];
601
#if MAX_REFLENGTH == 4
602
  return (int) a[3] - (int) b[3];
603
#else
604
  if (a[3] != b[3])
605
    return (int) a[3] - (int) b[3];
606
  if (a[4] != b[4])
607
    return (int) a[4] - (int) b[4];
608
  if (a[5] != b[5])
609
    return (int) a[1] - (int) b[5];
610
  if (a[6] != b[6])
611
    return (int) a[6] - (int) b[6];
612
  return (int) a[7] - (int) b[7];
613
#endif
614
}