~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
17
/* Copy data from a textfile to table */
18
/* 2006-12 Erik Wetterberg : LOAD XML added */
19
20
#include "mysql_priv.h"
21
#include "sql_repl.h"
202.3.6 by Monty Taylor
First pass at gettexizing the error messages.
22
#include <drizzled/drizzled_error_messages.h>
1 by brian
clean slate
23
24
class XML_TAG {
25
public:
26
  int level;
27
  String field;
28
  String value;
29
  XML_TAG(int l, String f, String v);
30
};
31
32
33
XML_TAG::XML_TAG(int l, String f, String v)
34
{
35
  level= l;
36
  field.append(f);
37
  value.append(v);
38
}
39
40
41
class READ_INFO {
42
  File	file;
43
  uchar	*buffer,			/* Buffer for read text */
44
	*end_of_buff;			/* Data in bufferts ends here */
45
  uint	buff_length,			/* Length of buffert */
46
	max_length;			/* Max length of row */
47
  char	*field_term_ptr,*line_term_ptr,*line_start_ptr,*line_start_end;
48
  uint	field_term_length,line_term_length,enclosed_length;
49
  int	field_term_char,line_term_char,enclosed_char,escape_char;
50
  int	*stack,*stack_pos;
51
  bool	found_end_of_line,start_of_line,eof;
52
  bool  need_end_io_cache;
53
  IO_CACHE cache;
54
  NET *io_net;
55
  int level; /* for load xml */
56
57
public:
58
  bool error,line_cuted,found_null,enclosed;
59
  uchar	*row_start,			/* Found row starts here */
60
	*row_end;			/* Found row ends here */
61
  CHARSET_INFO *read_charset;
62
63
  READ_INFO(File file,uint tot_length,CHARSET_INFO *cs,
64
	    String &field_term,String &line_start,String &line_term,
65
	    String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
66
  ~READ_INFO();
67
  int read_field();
68
  int read_fixed_length(void);
69
  int next_line(void);
70
  char unescape(char chr);
71
  int terminator(char *ptr,uint length);
72
  bool find_start_of_fields();
73
  /* load xml */
74
  List<XML_TAG> taglist;
75
  int read_value(int delim, String *val);
76
  int read_xml();
77
  int clear_level(int level);
78
79
  /*
80
    We need to force cache close before destructor is invoked to log
81
    the last read block
82
  */
83
  void end_io_cache()
84
  {
85
    ::end_io_cache(&cache);
86
    need_end_io_cache = 0;
87
  }
88
89
  /*
90
    Either this method, or we need to make cache public
91
    Arg must be set from mysql_load() since constructor does not see
92
    either the table or THD value
93
  */
94
  void set_io_cache_arg(void* arg) { cache.arg = arg; }
95
};
96
97
static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
98
                             List<Item> &fields_vars, List<Item> &set_fields,
99
                             List<Item> &set_values, READ_INFO &read_info,
100
			     ulong skip_lines,
101
			     bool ignore_check_option_errors);
102
static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
103
                          List<Item> &fields_vars, List<Item> &set_fields,
104
                          List<Item> &set_values, READ_INFO &read_info,
105
			  String &enclosed, ulong skip_lines,
106
			  bool ignore_check_option_errors);
107
108
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
109
                          List<Item> &fields_vars, List<Item> &set_fields,
110
                          List<Item> &set_values, READ_INFO &read_info,
111
                          String &enclosed, ulong skip_lines,
112
                          bool ignore_check_option_errors);
113
114
static bool write_execute_load_query_log_event(THD *thd,
115
					       bool duplicates, bool ignore,
116
					       bool transactional_table,
117
                                               THD::killed_state killed_status);
118
119
/*
120
  Execute LOAD DATA query
121
122
  SYNOPSYS
123
    mysql_load()
124
      thd - current thread
125
      ex  - sql_exchange object representing source file and its parsing rules
126
      table_list  - list of tables to which we are loading data
127
      fields_vars - list of fields and variables to which we read
128
                    data from file
129
      set_fields  - list of fields mentioned in set clause
130
      set_values  - expressions to assign to fields in previous list
131
      handle_duplicates - indicates whenever we should emit error or
132
                          replace row if we will meet duplicates.
133
      ignore -          - indicates whenever we should ignore duplicates
134
      read_file_from_client - is this LOAD DATA LOCAL ?
135
136
  RETURN VALUES
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
137
    true - error / false - success
1 by brian
clean slate
138
*/
139
140
int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
141
	        List<Item> &fields_vars, List<Item> &set_fields,
142
                List<Item> &set_values,
143
                enum enum_duplicates handle_duplicates, bool ignore,
144
                bool read_file_from_client)
145
{
146
  char name[FN_REFLEN];
147
  File file;
148
  TABLE *table= NULL;
149
  int error;
150
  String *field_term=ex->field_term,*escaped=ex->escaped;
151
  String *enclosed=ex->enclosed;
152
  bool is_fifo=0;
153
  LOAD_FILE_INFO lf_info;
154
  char *db = table_list->db;			// This is never null
155
  /*
156
    If path for file is not defined, we will use the current database.
157
    If this is not set, we will use the directory where the table to be
158
    loaded is located
159
  */
160
  char *tdb= thd->db ? thd->db : db;		// Result is never null
161
  ulong skip_lines= ex->skip_lines;
162
  bool transactional_table;
163
  THD::killed_state killed_status= THD::NOT_KILLED;
164
165
  if (escaped->length() > 1 || enclosed->length() > 1)
166
  {
167
    my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
168
	       MYF(0));
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
169
    return(true);
1 by brian
clean slate
170
  }
171
  if (open_and_lock_tables(thd, table_list))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
172
    return(true);
1 by brian
clean slate
173
  if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
174
                                    &thd->lex->select_lex.top_join_list,
175
                                    table_list,
176
                                    &thd->lex->select_lex.leaf_tables, true))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
177
     return(-1);
1 by brian
clean slate
178
179
  /*
180
    Let us emit an error if we are loading data to table which is used
181
    in subselect in SET clause like we do it for INSERT.
182
183
    The main thing to fix to remove this restriction is to ensure that the
184
    table is marked to be 'used for insert' in which case we should never
185
    mark this table as 'const table' (ie, one that has only one row).
186
  */
187
  if (unique_table(thd, table_list, table_list->next_global, 0))
188
  {
189
    my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
190
    return(true);
1 by brian
clean slate
191
  }
192
193
  table= table_list->table;
194
  transactional_table= table->file->has_transactions();
195
196
  if (!fields_vars.elements)
197
  {
198
    Field **field;
199
    for (field=table->field; *field ; field++)
200
      fields_vars.push_back(new Item_field(*field));
201
    bitmap_set_all(table->write_set);
202
    table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
203
    /*
204
      Let us also prepare SET clause, altough it is probably empty
205
      in this case.
206
    */
207
    if (setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
208
        setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
209
      return(true);
1 by brian
clean slate
210
  }
211
  else
212
  {						// Part field list
213
    /* TODO: use this conds for 'WITH CHECK OPTIONS' */
214
    if (setup_fields(thd, 0, fields_vars, MARK_COLUMNS_WRITE, 0, 0) ||
215
        setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
216
        check_that_all_fields_are_given_values(thd, table, table_list))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
217
      return(true);
1 by brian
clean slate
218
    /*
219
      Check whenever TIMESTAMP field with auto-set feature specified
220
      explicitly.
221
    */
222
    if (table->timestamp_field)
223
    {
224
      if (bitmap_is_set(table->write_set,
225
                        table->timestamp_field->field_index))
226
        table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
227
      else
228
      {
229
        bitmap_set_bit(table->write_set,
230
                       table->timestamp_field->field_index);
231
      }
232
    }
233
    /* Fix the expressions in SET clause */
234
    if (setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
235
      return(true);
1 by brian
clean slate
236
  }
237
238
  table->mark_columns_needed_for_insert();
239
240
  uint tot_length=0;
241
  bool use_blobs= 0, use_vars= 0;
242
  List_iterator_fast<Item> it(fields_vars);
243
  Item *item;
244
245
  while ((item= it++))
246
  {
247
    Item *real_item= item->real_item();
248
249
    if (real_item->type() == Item::FIELD_ITEM)
250
    {
251
      Field *field= ((Item_field*)real_item)->field;
252
      if (field->flags & BLOB_FLAG)
253
      {
254
        use_blobs= 1;
255
        tot_length+= 256;			// Will be extended if needed
256
      }
257
      else
258
        tot_length+= field->field_length;
259
    }
260
    else if (item->type() == Item::STRING_ITEM)
261
      use_vars= 1;
262
  }
263
  if (use_blobs && !ex->line_term->length() && !field_term->length())
264
  {
265
    my_message(ER_BLOBS_AND_NO_TERMINATED,ER(ER_BLOBS_AND_NO_TERMINATED),
266
	       MYF(0));
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
267
    return(true);
1 by brian
clean slate
268
  }
269
  if (use_vars && !field_term->length() && !enclosed->length())
270
  {
271
    my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
272
    return(true);
1 by brian
clean slate
273
  }
274
275
  /* We can't give an error in the middle when using LOCAL files */
276
  if (read_file_from_client && handle_duplicates == DUP_ERROR)
277
    ignore= 1;
278
279
  if (read_file_from_client)
280
  {
281
    (void)net_request_file(&thd->net,ex->file_name);
282
    file = -1;
283
  }
284
  else
285
  {
286
#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS
287
    ex->file_name+=dirname_length(ex->file_name);
288
#endif
289
    if (!dirname_length(ex->file_name))
290
    {
291
      strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
292
      (void) fn_format(name, ex->file_name, name, "",
293
		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
294
    }
295
    else
296
    {
297
      (void) fn_format(name, ex->file_name, mysql_real_data_home, "",
298
		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
299
300
      if (opt_secure_file_priv &&
301
          strncmp(opt_secure_file_priv, name, strlen(opt_secure_file_priv)))
302
      {
303
        /* Read only allowed from within dir specified by secure_file_priv */
304
        my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
305
        return(true);
1 by brian
clean slate
306
      }
307
15 by brian
Fix for stat, NETWARE removal
308
      struct stat stat_info;
309
      if (stat(name,&stat_info))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
310
	return(true);
1 by brian
clean slate
311
312
      // if we are not in slave thread, the file must be:
313
      if (!thd->slave_thread &&
314
	  !((stat_info.st_mode & S_IROTH) == S_IROTH &&  // readable by others
315
	    (stat_info.st_mode & S_IFLNK) != S_IFLNK && // and not a symlink
316
	    ((stat_info.st_mode & S_IFREG) == S_IFREG ||
317
	     (stat_info.st_mode & S_IFIFO) == S_IFIFO)))
318
      {
319
	my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
320
	return(true);
1 by brian
clean slate
321
      }
322
      if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
323
	is_fifo = 1;
324
    }
325
    if ((file=my_open(name,O_RDONLY,MYF(MY_WME))) < 0)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
326
      return(true);
1 by brian
clean slate
327
  }
328
329
  COPY_INFO info;
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
330
  memset((char*) &info, 0, sizeof(info));
1 by brian
clean slate
331
  info.ignore= ignore;
332
  info.handle_duplicates=handle_duplicates;
333
  info.escape_char=escaped->length() ? (*escaped)[0] : INT_MAX;
334
335
  READ_INFO read_info(file,tot_length,
336
                      ex->cs ? ex->cs : thd->variables.collation_database,
337
		      *field_term,*ex->line_start, *ex->line_term, *enclosed,
338
		      info.escape_char, read_file_from_client, is_fifo);
339
  if (read_info.error)
340
  {
341
    if	(file >= 0)
342
      my_close(file,MYF(0));			// no files in net reading
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
343
    return(true);				// Can't allocate buffers
1 by brian
clean slate
344
  }
345
346
  if (mysql_bin_log.is_open())
347
  {
348
    lf_info.thd = thd;
349
    lf_info.wrote_create_file = 0;
350
    lf_info.last_pos_in_file = HA_POS_ERROR;
351
    lf_info.log_delayed= transactional_table;
352
    read_info.set_io_cache_arg((void*) &lf_info);
353
  }
354
355
  thd->count_cuted_fields= CHECK_FIELD_WARN;		/* calc cuted fields */
356
  thd->cuted_fields=0L;
357
  /* Skip lines if there is a line terminator */
358
  if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
359
  {
360
    /* ex->skip_lines needs to be preserved for logging */
361
    while (skip_lines > 0)
362
    {
363
      skip_lines--;
364
      if (read_info.next_line())
365
	break;
366
    }
367
  }
368
369
  if (!(error=test(read_info.error)))
370
  {
371
372
    table->next_number_field=table->found_next_number_field;
373
    if (ignore ||
374
	handle_duplicates == DUP_REPLACE)
375
      table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
376
    if (handle_duplicates == DUP_REPLACE)
377
        table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
378
    table->file->ha_start_bulk_insert((ha_rows) 0);
379
    table->copy_blobs=1;
380
381
    thd->abort_on_warning= (!ignore &&
382
                            (thd->variables.sql_mode &
383
                             (MODE_STRICT_TRANS_TABLES |
384
                              MODE_STRICT_ALL_TABLES)));
385
386
    if (ex->filetype == FILETYPE_XML) /* load xml */
387
      error= read_xml_field(thd, info, table_list, fields_vars,
388
                            set_fields, set_values, read_info,
389
                            *(ex->line_term), skip_lines, ignore);
390
    else if (!field_term->length() && !enclosed->length())
391
      error= read_fixed_length(thd, info, table_list, fields_vars,
392
                               set_fields, set_values, read_info,
393
			       skip_lines, ignore);
394
    else
395
      error= read_sep_field(thd, info, table_list, fields_vars,
396
                            set_fields, set_values, read_info,
397
			    *enclosed, skip_lines, ignore);
398
    if (table->file->ha_end_bulk_insert() && !error)
399
    {
400
      table->file->print_error(my_errno, MYF(0));
401
      error= 1;
402
    }
403
    table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
404
    table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
405
    table->next_number_field=0;
406
  }
407
  if (file >= 0)
408
    my_close(file,MYF(0));
409
  free_blobs(table);				/* if pack_blob was used */
410
  table->copy_blobs=0;
411
  thd->count_cuted_fields= CHECK_FIELD_IGNORE;
412
  /* 
413
     simulated killing in the middle of per-row loop
414
     must be effective for binlogging
415
  */
416
  killed_status= (error == 0)? THD::NOT_KILLED : thd->killed;
417
  if (error)
418
  {
419
    if (read_file_from_client)
420
      while (!read_info.next_line())
421
	;
422
423
    if (mysql_bin_log.is_open())
424
    {
425
      {
426
	/*
427
	  Make sure last block (the one which caused the error) gets
428
	  logged.  This is needed because otherwise after write of (to
429
	  the binlog, not to read_info (which is a cache))
430
	  Delete_file_log_event the bad block will remain in read_info
431
	  (because pre_read is not called at the end of the last
432
	  block; remember pre_read is called whenever a new block is
433
	  read from disk).  At the end of mysql_load(), the destructor
434
	  of read_info will call end_io_cache() which will flush
435
	  read_info, so we will finally have this in the binlog:
436
437
	  Append_block # The last successfull block
438
	  Delete_file
439
	  Append_block # The failing block
440
	  which is nonsense.
441
	  Or could also be (for a small file)
442
	  Create_file  # The failing block
443
	  which is nonsense (Delete_file is not written in this case, because:
444
	  Create_file has not been written, so Delete_file is not written, then
445
	  when read_info is destroyed end_io_cache() is called which writes
446
	  Create_file.
447
	*/
448
	read_info.end_io_cache();
449
	/* If the file was not empty, wrote_create_file is true */
450
	if (lf_info.wrote_create_file)
451
	{
452
	  if (thd->transaction.stmt.modified_non_trans_table)
453
	    write_execute_load_query_log_event(thd, handle_duplicates,
454
					       ignore, transactional_table,
455
                                               killed_status);
456
	  else
457
	  {
458
	    Delete_file_log_event d(thd, db, transactional_table);
459
            d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
460
	    mysql_bin_log.write(&d);
461
	  }
462
	}
463
      }
464
    }
465
    error= -1;				// Error on read
466
    goto err;
467
  }
468
  sprintf(name, ER(ER_LOAD_INFO), (ulong) info.records, (ulong) info.deleted,
469
	  (ulong) (info.records - info.copied), (ulong) thd->cuted_fields);
470
471
  if (thd->transaction.stmt.modified_non_trans_table)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
472
    thd->transaction.all.modified_non_trans_table= true;
1 by brian
clean slate
473
474
  if (mysql_bin_log.is_open())
475
  {
476
    /*
477
      We need to do the job that is normally done inside
478
      binlog_query() here, which is to ensure that the pending event
479
      is written before tables are unlocked and before any other
480
      events are written.  We also need to update the table map
481
      version for the binary log to mark that table maps are invalid
482
      after this point.
483
     */
484
    if (thd->current_stmt_binlog_row_based)
485
      thd->binlog_flush_pending_rows_event(true);
486
    else
487
    {
488
      /*
489
        As already explained above, we need to call end_io_cache() or the last
490
        block will be logged only after Execute_load_query_log_event (which is
491
        wrong), when read_info is destroyed.
492
      */
493
      read_info.end_io_cache();
494
      if (lf_info.wrote_create_file)
495
      {
496
        write_execute_load_query_log_event(thd, handle_duplicates, ignore,
497
                                           transactional_table,killed_status);
498
      }
499
    }
500
  }
501
502
  /* ok to client sent only after binlog write and engine commit */
503
  my_ok(thd, info.copied + info.deleted, 0L, name);
504
err:
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
505
  assert(transactional_table || !(info.copied || info.deleted) ||
1 by brian
clean slate
506
              thd->transaction.stmt.modified_non_trans_table);
507
  table->file->ha_release_auto_increment();
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
508
  table->auto_increment_field_not_null= false;
1 by brian
clean slate
509
  thd->abort_on_warning= 0;
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
510
  return(error);
1 by brian
clean slate
511
}
512
513
514
/* Not a very useful function; just to avoid duplication of code */
515
static bool write_execute_load_query_log_event(THD *thd,
516
					       bool duplicates, bool ignore,
517
					       bool transactional_table,
518
                                               THD::killed_state killed_err_arg)
519
{
520
  Execute_load_query_log_event
521
    e(thd, thd->query, thd->query_length,
522
      (char*)thd->lex->fname_start - (char*)thd->query,
523
      (char*)thd->lex->fname_end - (char*)thd->query,
524
      (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
525
      (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
526
      transactional_table, false, killed_err_arg);
1 by brian
clean slate
527
  e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
528
  return mysql_bin_log.write(&e);
529
}
530
531
532
/****************************************************************************
533
** Read of rows of fixed size + optional garage + optonal newline
534
****************************************************************************/
535
536
static int
537
read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
538
                  List<Item> &fields_vars, List<Item> &set_fields,
539
                  List<Item> &set_values, READ_INFO &read_info,
540
                  ulong skip_lines, bool ignore_check_option_errors)
541
{
542
  List_iterator_fast<Item> it(fields_vars);
543
  Item_field *sql_field;
544
  TABLE *table= table_list->table;
151 by Brian Aker
Ulonglong to uint64_t
545
  uint64_t id;
1 by brian
clean slate
546
  bool err;
547
548
  id= 0;
549
 
550
  while (!read_info.read_fixed_length())
551
  {
552
    if (thd->killed)
553
    {
554
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
555
      return(1);
1 by brian
clean slate
556
    }
557
    if (skip_lines)
558
    {
559
      /*
560
	We could implement this with a simple seek if:
561
	- We are not using DATA INFILE LOCAL
562
	- escape character is  ""
563
	- line starting prefix is ""
564
      */
565
      skip_lines--;
566
      continue;
567
    }
568
    it.rewind();
569
    uchar *pos=read_info.row_start;
570
#ifdef HAVE_purify
571
    read_info.row_end[0]=0;
572
#endif
573
574
    restore_record(table, s->default_values);
575
    /*
576
      There is no variables in fields_vars list in this format so
577
      this conversion is safe.
578
    */
579
    while ((sql_field= (Item_field*) it++))
580
    {
581
      Field *field= sql_field->field;                  
582
      if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
583
        table->auto_increment_field_not_null= true;
1 by brian
clean slate
584
      /*
585
        No fields specified in fields_vars list can be null in this format.
586
        Mark field as not null, we should do this for each row because of
587
        restore_record...
588
      */
589
      field->set_notnull();
590
591
      if (pos == read_info.row_end)
592
      {
593
        thd->cuted_fields++;			/* Not enough fields */
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
594
        push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN, 
1 by brian
clean slate
595
                            ER_WARN_TOO_FEW_RECORDS, 
596
                            ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
212.2.2 by Patrick Galbraith
Renamed FIELD_TYPE to DRIZZLE_TYPE
597
        if (!field->maybe_null() && field->type() == DRIZZLE_TYPE_TIMESTAMP)
1 by brian
clean slate
598
            ((Field_timestamp*) field)->set_time();
599
      }
600
      else
601
      {
602
	uint length;
603
	uchar save_chr;
604
	if ((length=(uint) (read_info.row_end-pos)) >
605
	    field->field_length)
606
	  length=field->field_length;
607
	save_chr=pos[length]; pos[length]='\0'; // Safeguard aganst malloc
608
        field->store((char*) pos,length,read_info.read_charset);
609
	pos[length]=save_chr;
610
	if ((pos+=length) > read_info.row_end)
611
	  pos= read_info.row_end;	/* Fills rest with space */
612
      }
613
    }
614
    if (pos != read_info.row_end)
615
    {
616
      thd->cuted_fields++;			/* To long row */
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
617
      push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN, 
1 by brian
clean slate
618
                          ER_WARN_TOO_MANY_RECORDS, 
619
                          ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); 
620
    }
621
622
    if (thd->killed ||
623
        fill_record(thd, set_fields, set_values,
624
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
625
      return(1);
1 by brian
clean slate
626
627
    err= write_record(thd, table, &info);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
628
    table->auto_increment_field_not_null= false;
1 by brian
clean slate
629
    if (err)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
630
      return(1);
1 by brian
clean slate
631
   
632
    /*
633
      We don't need to reset auto-increment field since we are restoring
634
      its default value at the beginning of each loop iteration.
635
    */
636
    if (read_info.next_line())			// Skip to next line
637
      break;
638
    if (read_info.line_cuted)
639
    {
640
      thd->cuted_fields++;			/* To long row */
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
641
      push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN, 
1 by brian
clean slate
642
                          ER_WARN_TOO_MANY_RECORDS, 
643
                          ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); 
644
    }
645
    thd->row_count++;
646
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
647
  return(test(read_info.error));
1 by brian
clean slate
648
}
649
650
651
652
static int
653
read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
654
               List<Item> &fields_vars, List<Item> &set_fields,
655
               List<Item> &set_values, READ_INFO &read_info,
656
	       String &enclosed, ulong skip_lines,
657
	       bool ignore_check_option_errors)
658
{
659
  List_iterator_fast<Item> it(fields_vars);
660
  Item *item;
661
  TABLE *table= table_list->table;
662
  uint enclosed_length;
151 by Brian Aker
Ulonglong to uint64_t
663
  uint64_t id;
1 by brian
clean slate
664
  bool err;
665
666
  enclosed_length=enclosed.length();
667
  id= 0;
668
669
  for (;;it.rewind())
670
  {
671
    if (thd->killed)
672
    {
673
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
674
      return(1);
1 by brian
clean slate
675
    }
676
677
    restore_record(table, s->default_values);
678
679
    while ((item= it++))
680
    {
681
      uint length;
682
      uchar *pos;
683
      Item *real_item;
684
685
      if (read_info.read_field())
686
	break;
687
688
      /* If this line is to be skipped we don't want to fill field or var */
689
      if (skip_lines)
690
        continue;
691
692
      pos=read_info.row_start;
693
      length=(uint) (read_info.row_end-pos);
694
695
      real_item= item->real_item();
696
697
      if ((!read_info.enclosed && (enclosed_length && length == 4 && !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
698
	  (length == 1 && read_info.found_null))
699
      {
700
701
        if (real_item->type() == Item::FIELD_ITEM)
702
        {
703
          Field *field= ((Item_field *)real_item)->field;
704
          if (field->reset())
705
          {
706
            my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
707
                     thd->row_count);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
708
            return(1);
1 by brian
clean slate
709
          }
710
          field->set_null();
711
          if (!field->maybe_null())
712
          {
212.2.2 by Patrick Galbraith
Renamed FIELD_TYPE to DRIZZLE_TYPE
713
            if (field->type() == DRIZZLE_TYPE_TIMESTAMP)
1 by brian
clean slate
714
              ((Field_timestamp*) field)->set_time();
715
            else if (field != table->next_number_field)
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
716
              field->set_warning(DRIZZLE_ERROR::WARN_LEVEL_WARN,
1 by brian
clean slate
717
                                 ER_WARN_NULL_TO_NOTNULL, 1);
718
          }
719
	}
720
        else if (item->type() == Item::STRING_ITEM)
721
        {
722
          ((Item_user_var_as_out_param *)item)->set_null_value(
723
                                                  read_info.read_charset);
724
        }
725
        else
726
        {
727
          my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
728
          return(1);
1 by brian
clean slate
729
        }
730
731
	continue;
732
      }
733
734
      if (real_item->type() == Item::FIELD_ITEM)
735
      {
736
        Field *field= ((Item_field *)real_item)->field;
737
        field->set_notnull();
738
        read_info.row_end[0]=0;			// Safe to change end marker
739
        if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
740
          table->auto_increment_field_not_null= true;
1 by brian
clean slate
741
        field->store((char*) pos, length, read_info.read_charset);
742
      }
743
      else if (item->type() == Item::STRING_ITEM)
744
      {
745
        ((Item_user_var_as_out_param *)item)->set_value((char*) pos, length,
746
                                                        read_info.read_charset);
747
      }
748
      else
749
      {
750
        my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
751
        return(1);
1 by brian
clean slate
752
      }
753
    }
754
    if (read_info.error)
755
      break;
756
    if (skip_lines)
757
    {
758
      skip_lines--;
759
      continue;
760
    }
761
    if (item)
762
    {
763
      /* Have not read any field, thus input file is simply ended */
764
      if (item == fields_vars.head())
765
	break;
766
      for (; item ; item= it++)
767
      {
768
        Item *real_item= item->real_item();
769
        if (real_item->type() == Item::FIELD_ITEM)
770
        {
771
          Field *field= ((Item_field *)real_item)->field;
772
          if (field->reset())
773
          {
774
            my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name,
775
                     thd->row_count);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
776
            return(1);
1 by brian
clean slate
777
          }
212.2.2 by Patrick Galbraith
Renamed FIELD_TYPE to DRIZZLE_TYPE
778
          if (!field->maybe_null() && field->type() == DRIZZLE_TYPE_TIMESTAMP)
1 by brian
clean slate
779
              ((Field_timestamp*) field)->set_time();
780
          /*
781
            QQ: We probably should not throw warning for each field.
782
            But how about intention to always have the same number
783
            of warnings in THD::cuted_fields (and get rid of cuted_fields
784
            in the end ?)
785
          */
786
          thd->cuted_fields++;
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
787
          push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1 by brian
clean slate
788
                              ER_WARN_TOO_FEW_RECORDS,
789
                              ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
790
        }
791
        else if (item->type() == Item::STRING_ITEM)
792
        {
793
          ((Item_user_var_as_out_param *)item)->set_null_value(
794
                                                  read_info.read_charset);
795
        }
796
        else
797
        {
798
          my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
799
          return(1);
1 by brian
clean slate
800
        }
801
      }
802
    }
803
804
    if (thd->killed ||
805
        fill_record(thd, set_fields, set_values,
806
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
807
      return(1);
1 by brian
clean slate
808
809
    err= write_record(thd, table, &info);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
810
    table->auto_increment_field_not_null= false;
1 by brian
clean slate
811
    if (err)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
812
      return(1);
1 by brian
clean slate
813
    /*
814
      We don't need to reset auto-increment field since we are restoring
815
      its default value at the beginning of each loop iteration.
816
    */
817
    if (read_info.next_line())			// Skip to next line
818
      break;
819
    if (read_info.line_cuted)
820
    {
821
      thd->cuted_fields++;			/* To long row */
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
822
      push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN, 
1 by brian
clean slate
823
                          ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS), 
824
                          thd->row_count);   
825
      if (thd->killed)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
826
        return(1);
1 by brian
clean slate
827
    }
828
    thd->row_count++;
829
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
830
  return(test(read_info.error));
1 by brian
clean slate
831
}
832
833
834
/****************************************************************************
835
** Read rows in xml format
836
****************************************************************************/
837
static int
838
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
839
               List<Item> &fields_vars, List<Item> &set_fields,
840
               List<Item> &set_values, READ_INFO &read_info,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
841
               String &row_tag __attribute__((unused)),
77.1.46 by Monty Taylor
Finished the warnings work!
842
               ulong skip_lines,
1 by brian
clean slate
843
               bool ignore_check_option_errors)
844
{
845
  List_iterator_fast<Item> it(fields_vars);
846
  Item *item;
847
  TABLE *table= table_list->table;
848
  bool no_trans_update_stmt;
849
  CHARSET_INFO *cs= read_info.read_charset;
77.1.46 by Monty Taylor
Finished the warnings work!
850
1 by brian
clean slate
851
  no_trans_update_stmt= !table->file->has_transactions();
77.1.46 by Monty Taylor
Finished the warnings work!
852
1 by brian
clean slate
853
  for ( ; ; it.rewind())
854
  {
855
    if (thd->killed)
856
    {
857
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
858
      return(1);
1 by brian
clean slate
859
    }
860
    
861
    // read row tag and save values into tag list
862
    if (read_info.read_xml())
863
      break;
864
    
865
    List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
866
    xmlit.rewind();
867
    XML_TAG *tag= NULL;
868
    
869
    
870
    restore_record(table, s->default_values);
871
    
872
    while ((item= it++))
873
    {
874
      /* If this line is to be skipped we don't want to fill field or var */
875
      if (skip_lines)
876
        continue;
877
      
878
      /* find field in tag list */
879
      xmlit.rewind();
880
      tag= xmlit++;
881
      
882
      while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
883
        tag= xmlit++;
884
      
885
      if (!tag) // found null
886
      {
887
        if (item->type() == Item::FIELD_ITEM)
888
        {
889
          Field *field= ((Item_field *) item)->field;
890
          field->reset();
891
          field->set_null();
892
          if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
893
            table->auto_increment_field_not_null= true;
1 by brian
clean slate
894
          if (!field->maybe_null())
895
          {
212.2.2 by Patrick Galbraith
Renamed FIELD_TYPE to DRIZZLE_TYPE
896
            if (field->type() == DRIZZLE_TYPE_TIMESTAMP)
1 by brian
clean slate
897
              ((Field_timestamp *) field)->set_time();
898
            else if (field != table->next_number_field)
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
899
              field->set_warning(DRIZZLE_ERROR::WARN_LEVEL_WARN,
1 by brian
clean slate
900
                                 ER_WARN_NULL_TO_NOTNULL, 1);
901
          }
902
        }
903
        else
904
          ((Item_user_var_as_out_param *) item)->set_null_value(cs);
905
        continue;
906
      }
907
908
      if (item->type() == Item::FIELD_ITEM)
909
      {
910
911
        Field *field= ((Item_field *)item)->field;
912
        field->set_notnull();
913
        if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
914
          table->auto_increment_field_not_null= true;
1 by brian
clean slate
915
        field->store((char *) tag->value.ptr(), tag->value.length(), cs);
916
      }
917
      else
918
        ((Item_user_var_as_out_param *) item)->set_value(
919
                                                 (char *) tag->value.ptr(), 
920
                                                 tag->value.length(), cs);
921
    }
922
    
923
    if (read_info.error)
924
      break;
925
    
926
    if (skip_lines)
927
    {
928
      skip_lines--;
929
      continue;
930
    }
931
    
932
    if (item)
933
    {
934
      /* Have not read any field, thus input file is simply ended */
935
      if (item == fields_vars.head())
936
        break;
937
      
938
      for ( ; item; item= it++)
939
      {
940
        if (item->type() == Item::FIELD_ITEM)
941
        {
942
          /*
943
            QQ: We probably should not throw warning for each field.
944
            But how about intention to always have the same number
945
            of warnings in THD::cuted_fields (and get rid of cuted_fields
946
            in the end ?)
947
          */
948
          thd->cuted_fields++;
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
949
          push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1 by brian
clean slate
950
                              ER_WARN_TOO_FEW_RECORDS,
951
                              ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
952
        }
953
        else
954
          ((Item_user_var_as_out_param *)item)->set_null_value(cs);
955
      }
956
    }
957
958
    if (thd->killed || fill_record(thd, set_fields, set_values,
959
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
960
      return(1);
1 by brian
clean slate
961
962
    if (write_record(thd, table, &info))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
963
      return(1);
1 by brian
clean slate
964
    
965
    /*
966
      We don't need to reset auto-increment field since we are restoring
967
      its default value at the beginning of each loop iteration.
968
    */
969
    thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
970
    thd->row_count++;
971
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
972
  return(test(read_info.error));
1 by brian
clean slate
973
} /* load xml end */
974
975
976
/* Unescape all escape characters, mark \N as null */
977
978
char
979
READ_INFO::unescape(char chr)
980
{
981
  /* keep this switch synchornous with the ESCAPE_CHARS macro */
982
  switch(chr) {
983
  case 'n': return '\n';
984
  case 't': return '\t';
985
  case 'r': return '\r';
986
  case 'b': return '\b';
987
  case '0': return 0;				// Ascii null
988
  case 'Z': return '\032';			// Win32 end of file
989
  case 'N': found_null=1;
990
991
    /* fall through */
992
  default:  return chr;
993
  }
994
}
995
996
997
/*
998
  Read a line using buffering
999
  If last line is empty (in line mode) then it isn't outputed
1000
*/
1001
1002
1003
READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
1004
		     String &field_term, String &line_start, String &line_term,
1005
		     String &enclosed_par, int escape, bool get_it_from_net,
1006
		     bool is_fifo)
1007
  :file(file_par),escape_char(escape)
1008
{
1009
  read_charset= cs;
1010
  field_term_ptr=(char*) field_term.ptr();
1011
  field_term_length= field_term.length();
1012
  line_term_ptr=(char*) line_term.ptr();
1013
  line_term_length= line_term.length();
1014
  level= 0; /* for load xml */
1015
  if (line_start.length() == 0)
1016
  {
1017
    line_start_ptr=0;
1018
    start_of_line= 0;
1019
  }
1020
  else
1021
  {
1022
    line_start_ptr=(char*) line_start.ptr();
1023
    line_start_end=line_start_ptr+line_start.length();
1024
    start_of_line= 1;
1025
  }
1026
  /* If field_terminator == line_terminator, don't use line_terminator */
1027
  if (field_term_length == line_term_length &&
1028
      !memcmp(field_term_ptr,line_term_ptr,field_term_length))
1029
  {
1030
    line_term_length=0;
1031
    line_term_ptr=(char*) "";
1032
  }
1033
  enclosed_char= (enclosed_length=enclosed_par.length()) ?
1034
    (uchar) enclosed_par[0] : INT_MAX;
1035
  field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
1036
  line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
1037
  error=eof=found_end_of_line=found_null=line_cuted=0;
1038
  buff_length=tot_length;
1039
1040
1041
  /* Set of a stack for unget if long terminators */
1042
  uint length=max(field_term_length,line_term_length)+1;
1043
  set_if_bigger(length,line_start.length());
1044
  stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
1045
1046
  if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(0))))
1047
    error=1; /* purecov: inspected */
1048
  else
1049
  {
1050
    end_of_buff=buffer+buff_length;
1051
    if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1052
		      (get_it_from_net) ? READ_NET :
1053
		      (is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1054
		      MYF(MY_WME)))
1055
    {
1056
      my_free((uchar*) buffer,MYF(0)); /* purecov: inspected */
1057
      error=1;
1058
    }
1059
    else
1060
    {
1061
      /*
1062
	init_io_cache() will not initialize read_function member
1063
	if the cache is READ_NET. So we work around the problem with a
1064
	manual assignment
1065
      */
1066
      need_end_io_cache = 1;
1067
1068
      if (get_it_from_net)
1069
	cache.read_function = _my_b_net_read;
1070
1071
      if (mysql_bin_log.is_open())
1072
	cache.pre_read = cache.pre_close =
1073
	  (IO_CACHE_CALLBACK) log_loaded_block;
1074
    }
1075
  }
1076
}
1077
1078
1079
READ_INFO::~READ_INFO()
1080
{
1081
  if (!error)
1082
  {
1083
    if (need_end_io_cache)
1084
      ::end_io_cache(&cache);
1085
    my_free((uchar*) buffer,MYF(0));
1086
    error=1;
1087
  }
1088
  List_iterator<XML_TAG> xmlit(taglist);
1089
  XML_TAG *t;
1090
  while ((t= xmlit++))
1091
    delete(t);
1092
}
1093
1094
1095
#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
1096
#define PUSH(A) *(stack_pos++)=(A)
1097
1098
1099
inline int READ_INFO::terminator(char *ptr,uint length)
1100
{
1101
  int chr=0;					// Keep gcc happy
1102
  uint i;
1103
  for (i=1 ; i < length ; i++)
1104
  {
1105
    if ((chr=GET) != *++ptr)
1106
    {
1107
      break;
1108
    }
1109
  }
1110
  if (i == length)
1111
    return 1;
1112
  PUSH(chr);
1113
  while (i-- > 1)
1114
    PUSH((uchar) *--ptr);
1115
  return 0;
1116
}
1117
1118
1119
int READ_INFO::read_field()
1120
{
1121
  int chr,found_enclosed_char;
1122
  uchar *to,*new_buffer;
1123
1124
  found_null=0;
1125
  if (found_end_of_line)
1126
    return 1;					// One have to call next_line
1127
1128
  /* Skip until we find 'line_start' */
1129
1130
  if (start_of_line)
1131
  {						// Skip until line_start
1132
    start_of_line=0;
1133
    if (find_start_of_fields())
1134
      return 1;
1135
  }
1136
  if ((chr=GET) == my_b_EOF)
1137
  {
1138
    found_end_of_line=eof=1;
1139
    return 1;
1140
  }
1141
  to=buffer;
1142
  if (chr == enclosed_char)
1143
  {
1144
    found_enclosed_char=enclosed_char;
1145
    *to++=(uchar) chr;				// If error
1146
  }
1147
  else
1148
  {
1149
    found_enclosed_char= INT_MAX;
1150
    PUSH(chr);
1151
  }
1152
1153
  for (;;)
1154
  {
1155
    while ( to < end_of_buff)
1156
    {
1157
      chr = GET;
1158
#ifdef USE_MB
1159
      if ((my_mbcharlen(read_charset, chr) > 1) &&
1160
          to+my_mbcharlen(read_charset, chr) <= end_of_buff)
1161
      {
1162
	  uchar* p = (uchar*)to;
1163
	  *to++ = chr;
1164
	  int ml = my_mbcharlen(read_charset, chr);
1165
	  int i;
1166
	  for (i=1; i<ml; i++) {
1167
	      chr = GET;
1168
	      if (chr == my_b_EOF)
1169
		  goto found_eof;
1170
	      *to++ = chr;
1171
	  }
1172
	  if (my_ismbchar(read_charset,
1173
                          (const char *)p,
1174
                          (const char *)to))
1175
	    continue;
1176
	  for (i=0; i<ml; i++)
1177
	    PUSH((uchar) *--to);
1178
	  chr = GET;
1179
      }
1180
#endif
1181
      if (chr == my_b_EOF)
1182
	goto found_eof;
1183
      if (chr == escape_char)
1184
      {
1185
	if ((chr=GET) == my_b_EOF)
1186
	{
1187
	  *to++= (uchar) escape_char;
1188
	  goto found_eof;
1189
	}
1190
        /*
1191
          When escape_char == enclosed_char, we treat it like we do for
1192
          handling quotes in SQL parsing -- you can double-up the
1193
          escape_char to include it literally, but it doesn't do escapes
1194
          like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1195
          with data like: "fie""ld1", "field2"
1196
         */
1197
        if (escape_char != enclosed_char || chr == escape_char)
1198
        {
1199
          *to++ = (uchar) unescape((char) chr);
1200
          continue;
1201
        }
1202
        PUSH(chr);
1203
        chr= escape_char;
1204
      }
1205
#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
1206
      if (chr == line_term_char)
1207
#else
1208
      if (chr == line_term_char && found_enclosed_char == INT_MAX)
1209
#endif
1210
      {
1211
	if (terminator(line_term_ptr,line_term_length))
1212
	{					// Maybe unexpected linefeed
1213
	  enclosed=0;
1214
	  found_end_of_line=1;
1215
	  row_start=buffer;
1216
	  row_end=  to;
1217
	  return 0;
1218
	}
1219
      }
1220
      if (chr == found_enclosed_char)
1221
      {
1222
	if ((chr=GET) == found_enclosed_char)
1223
	{					// Remove dupplicated
1224
	  *to++ = (uchar) chr;
1225
	  continue;
1226
	}
1227
	// End of enclosed field if followed by field_term or line_term
1228
	if (chr == my_b_EOF ||
1229
	    (chr == line_term_char && terminator(line_term_ptr, line_term_length)))
1230
	{					// Maybe unexpected linefeed
1231
	  enclosed=1;
1232
	  found_end_of_line=1;
1233
	  row_start=buffer+1;
1234
	  row_end=  to;
1235
	  return 0;
1236
	}
1237
	if (chr == field_term_char &&
1238
	    terminator(field_term_ptr,field_term_length))
1239
	{
1240
	  enclosed=1;
1241
	  row_start=buffer+1;
1242
	  row_end=  to;
1243
	  return 0;
1244
	}
1245
	/*
1246
	  The string didn't terminate yet.
1247
	  Store back next character for the loop
1248
	*/
1249
	PUSH(chr);
1250
	/* copy the found term character to 'to' */
1251
	chr= found_enclosed_char;
1252
      }
1253
      else if (chr == field_term_char && found_enclosed_char == INT_MAX)
1254
      {
1255
	if (terminator(field_term_ptr,field_term_length))
1256
	{
1257
	  enclosed=0;
1258
	  row_start=buffer;
1259
	  row_end=  to;
1260
	  return 0;
1261
	}
1262
      }
1263
      *to++ = (uchar) chr;
1264
    }
1265
    /*
1266
    ** We come here if buffer is too small. Enlarge it and continue
1267
    */
1268
    if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE,
1269
					MYF(MY_WME))))
1270
      return (error=1);
1271
    to=new_buffer + (to-buffer);
1272
    buffer=new_buffer;
1273
    buff_length+=IO_SIZE;
1274
    end_of_buff=buffer+buff_length;
1275
  }
1276
1277
found_eof:
1278
  enclosed=0;
1279
  found_end_of_line=eof=1;
1280
  row_start=buffer;
1281
  row_end=to;
1282
  return 0;
1283
}
1284
1285
/*
1286
  Read a row with fixed length.
1287
1288
  NOTES
1289
    The row may not be fixed size on disk if there are escape
1290
    characters in the file.
1291
1292
  IMPLEMENTATION NOTE
1293
    One can't use fixed length with multi-byte charset **
1294
1295
  RETURN
1296
    0  ok
1297
    1  error
1298
*/
1299
1300
int READ_INFO::read_fixed_length()
1301
{
1302
  int chr;
1303
  uchar *to;
1304
  if (found_end_of_line)
1305
    return 1;					// One have to call next_line
1306
1307
  if (start_of_line)
1308
  {						// Skip until line_start
1309
    start_of_line=0;
1310
    if (find_start_of_fields())
1311
      return 1;
1312
  }
1313
1314
  to=row_start=buffer;
1315
  while (to < end_of_buff)
1316
  {
1317
    if ((chr=GET) == my_b_EOF)
1318
      goto found_eof;
1319
    if (chr == escape_char)
1320
    {
1321
      if ((chr=GET) == my_b_EOF)
1322
      {
1323
	*to++= (uchar) escape_char;
1324
	goto found_eof;
1325
      }
1326
      *to++ =(uchar) unescape((char) chr);
1327
      continue;
1328
    }
1329
    if (chr == line_term_char)
1330
    {
1331
      if (terminator(line_term_ptr,line_term_length))
1332
      {						// Maybe unexpected linefeed
1333
	found_end_of_line=1;
1334
	row_end=  to;
1335
	return 0;
1336
      }
1337
    }
1338
    *to++ = (uchar) chr;
1339
  }
1340
  row_end=to;					// Found full line
1341
  return 0;
1342
1343
found_eof:
1344
  found_end_of_line=eof=1;
1345
  row_start=buffer;
1346
  row_end=to;
1347
  return to == buffer ? 1 : 0;
1348
}
1349
1350
1351
int READ_INFO::next_line()
1352
{
1353
  line_cuted=0;
1354
  start_of_line= line_start_ptr != 0;
1355
  if (found_end_of_line || eof)
1356
  {
1357
    found_end_of_line=0;
1358
    return eof;
1359
  }
1360
  found_end_of_line=0;
1361
  if (!line_term_length)
1362
    return 0;					// No lines
1363
  for (;;)
1364
  {
1365
    int chr = GET;
1366
#ifdef USE_MB
1367
   if (my_mbcharlen(read_charset, chr) > 1)
1368
   {
1369
       for (uint i=1;
1370
            chr != my_b_EOF && i<my_mbcharlen(read_charset, chr);
1371
            i++)
1372
	   chr = GET;
1373
       if (chr == escape_char)
1374
	   continue;
1375
   }
1376
#endif
1377
   if (chr == my_b_EOF)
1378
   {
1379
      eof=1;
1380
      return 1;
1381
    }
1382
    if (chr == escape_char)
1383
    {
1384
      line_cuted=1;
1385
      if (GET == my_b_EOF)
1386
	return 1;
1387
      continue;
1388
    }
1389
    if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
1390
      return 0;
1391
    line_cuted=1;
1392
  }
1393
}
1394
1395
1396
bool READ_INFO::find_start_of_fields()
1397
{
1398
  int chr;
1399
 try_again:
1400
  do
1401
  {
1402
    if ((chr=GET) == my_b_EOF)
1403
    {
1404
      found_end_of_line=eof=1;
1405
      return 1;
1406
    }
1407
  } while ((char) chr != line_start_ptr[0]);
1408
  for (char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
1409
  {
1410
    chr=GET;					// Eof will be checked later
1411
    if ((char) chr != *ptr)
1412
    {						// Can't be line_start
1413
      PUSH(chr);
1414
      while (--ptr != line_start_ptr)
1415
      {						// Restart with next char
1416
	PUSH((uchar) *ptr);
1417
      }
1418
      goto try_again;
1419
    }
1420
  }
1421
  return 0;
1422
}
1423
1424
1425
/*
1426
  Clear taglist from tags with a specified level
1427
*/
1428
int READ_INFO::clear_level(int level)
1429
{
1430
  List_iterator<XML_TAG> xmlit(taglist);
1431
  xmlit.rewind();
1432
  XML_TAG *tag;
1433
  
1434
  while ((tag= xmlit++))
1435
  {
1436
     if(tag->level >= level)
1437
     {
1438
       xmlit.remove();
1439
       delete tag;
1440
     }
1441
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1442
  return(0);
1 by brian
clean slate
1443
}
1444
1445
1446
/*
1447
  Convert an XML entity to Unicode value.
1448
  Return -1 on error;
1449
*/
1450
static int
1451
my_xml_entity_to_char(const char *name, uint length)
1452
{
1453
  if (length == 2)
1454
  {
1455
    if (!memcmp(name, "gt", length))
1456
      return '>';
1457
    if (!memcmp(name, "lt", length))
1458
      return '<';
1459
  }
1460
  else if (length == 3)
1461
  {
1462
    if (!memcmp(name, "amp", length))
1463
      return '&';
1464
  }
1465
  else if (length == 4)
1466
  {
1467
    if (!memcmp(name, "quot", length))
1468
      return '"';
1469
    if (!memcmp(name, "apos", length))
1470
      return '\'';
1471
  }
1472
  return -1;
1473
}
1474
1475
1476
/**
1477
  @brief Convert newline, linefeed, tab to space
1478
  
1479
  @param chr    character
1480
  
1481
  @details According to the "XML 1.0" standard,
1482
           only space (#x20) characters, carriage returns,
1483
           line feeds or tabs are considered as spaces.
1484
           Convert all of them to space (#x20) for parsing simplicity.
1485
*/
1486
static int
1487
my_tospace(int chr)
1488
{
1489
  return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1490
}
1491
1492
1493
/*
1494
  Read an xml value: handle multibyte and xml escape
1495
*/
1496
int READ_INFO::read_value(int delim, String *val)
1497
{
1498
  int chr;
1499
  String tmp;
1500
1501
  for (chr= my_tospace(GET); chr != delim && chr != my_b_EOF; )
1502
  {
1503
#ifdef USE_MB
1504
    if (my_mbcharlen(read_charset, chr) > 1)
1505
    {
1506
      int i, ml= my_mbcharlen(read_charset, chr);
1507
      for (i= 1; i < ml; i++) 
1508
      {
1509
        val->append(chr);
1510
        /*
1511
          Don't use my_tospace() in the middle of a multi-byte character
1512
          TODO: check that the multi-byte sequence is valid.
1513
        */
1514
        chr= GET; 
1515
        if (chr == my_b_EOF)
1516
          return chr;
1517
      }
1518
    }
1519
#endif
1520
    if(chr == '&')
1521
    {
1522
      tmp.length(0);
1523
      for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
1524
      {
1525
        if (chr == my_b_EOF)
1526
          return chr;
1527
        tmp.append(chr);
1528
      }
1529
      if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1530
        val->append(chr);
1531
      else
1532
      {
1533
        val->append('&');
1534
        val->append(tmp);
1535
        val->append(';'); 
1536
      }
1537
    }
1538
    else
1539
      val->append(chr);
1540
    chr= my_tospace(GET);
1541
  }            
1542
  return chr;
1543
}
1544
1545
1546
/*
1547
  Read a record in xml format
1548
  tags and attributes are stored in taglist
1549
  when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1550
*/
1551
int READ_INFO::read_xml()
1552
{
1553
  int chr, chr2, chr3;
1554
  int delim= 0;
1555
  String tag, attribute, value;
1556
  bool in_tag= false;
1557
  
1558
  tag.length(0);
1559
  attribute.length(0);
1560
  value.length(0);
1561
  
1562
  for (chr= my_tospace(GET); chr != my_b_EOF ; )
1563
  {
1564
    switch(chr){
1565
    case '<':  /* read tag */
1566
        /* TODO: check if this is a comment <!-- comment -->  */
1567
      chr= my_tospace(GET);
1568
      if(chr == '!')
1569
      {
1570
        chr2= GET;
1571
        chr3= GET;
1572
        
1573
        if(chr2 == '-' && chr3 == '-')
1574
        {
1575
          chr2= 0;
1576
          chr3= 0;
1577
          chr= my_tospace(GET);
1578
          
1579
          while(chr != '>' || chr2 != '-' || chr3 != '-')
1580
          {
1581
            if(chr == '-')
1582
            {
1583
              chr3= chr2;
1584
              chr2= chr;
1585
            }
1586
            else if (chr2 == '-')
1587
            {
1588
              chr2= 0;
1589
              chr3= 0;
1590
            }
1591
            chr= my_tospace(GET);
1592
            if (chr == my_b_EOF)
1593
              goto found_eof;
1594
          }
1595
          break;
1596
        }
1597
      }
1598
      
1599
      tag.length(0);
1600
      while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
1601
      {
1602
        if(chr != delim) /* fix for the '<field name =' format */
1603
          tag.append(chr);
1604
        chr= my_tospace(GET);
1605
      }
1606
      
1607
      if(chr == ' ' || chr == '>')
1608
      {
1609
        level++;
1610
        clear_level(level + 1);
1611
      }
1612
      
1613
      if (chr == ' ')
1614
        in_tag= true;
1615
      else 
1616
        in_tag= false;
1617
      break;
1618
      
1619
    case ' ': /* read attribute */
1620
      while(chr == ' ')  /* skip blanks */
1621
        chr= my_tospace(GET);
1622
      
1623
      if(!in_tag)
1624
        break;
1625
      
1626
      while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
1627
      {
1628
        attribute.append(chr);
1629
        chr= my_tospace(GET);
1630
      }
1631
      break;
1632
      
1633
    case '>': /* end tag - read tag value */
1634
      in_tag= false;
1635
      chr= read_value('<', &value);
1636
      if(chr == my_b_EOF)
1637
        goto found_eof;
1638
      
1639
      /* save value to list */
1640
      if(tag.length() > 0 && value.length() > 0)
1641
        taglist.push_front( new XML_TAG(level, tag, value));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1642
1 by brian
clean slate
1643
      tag.length(0);
1644
      value.length(0);
1645
      attribute.length(0);
1646
      break;
1647
      
1648
    case '/': /* close tag */
1649
      level--;
1650
      chr= my_tospace(GET);
1651
      if(chr != '>')   /* if this is an empty tag <tag   /> */
1652
        tag.length(0); /* we should keep tag value          */
1653
      while(chr != '>' && chr != my_b_EOF)
1654
      {
1655
        tag.append(chr);
1656
        chr= my_tospace(GET);
1657
      }
1658
      
1659
      if((tag.length() == line_term_length -2) &&
1660
         (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1661
         return(0); //normal return
1662
1 by brian
clean slate
1663
      chr= my_tospace(GET);
1664
      break;   
1665
      
1666
    case '=': /* attribute name end - read the value */
1667
      //check for tag field and attribute name
1668
      if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
1669
         !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
1670
      {
1671
        /*
1672
          this is format <field name="xx">xx</field>
1673
          where actual fieldname is in attribute
1674
        */
1675
        delim= my_tospace(GET);
1676
        tag.length(0);
1677
        attribute.length(0);
1678
        chr= '<'; /* we pretend that it is a tag */
1679
        level--;
1680
        break;
1681
      }
1682
      
1683
      //check for " or '
1684
      chr= GET;
1685
      if (chr == my_b_EOF)
1686
        goto found_eof;
1687
      if(chr == '"' || chr == '\'')
1688
      {
1689
        delim= chr;
1690
      }
1691
      else
1692
      {
1693
        delim= ' '; /* no delimiter, use space */
1694
        PUSH(chr);
1695
      }
1696
      
1697
      chr= read_value(delim, &value);
1698
      if(attribute.length() > 0 && value.length() > 0)
1699
        taglist.push_front(new XML_TAG(level + 1, attribute, value));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1700
1 by brian
clean slate
1701
      attribute.length(0);
1702
      value.length(0);
1703
      if (chr != ' ')
1704
        chr= my_tospace(GET);
1705
      break;
1706
    
1707
    default:
1708
      chr= my_tospace(GET);
1709
    } /* end switch */
1710
  } /* end while */
1711
  
1712
found_eof:
1713
  eof= 1;
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1714
  return(1);
1 by brian
clean slate
1715
}