~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
17
/* Copy data from a textfile to table */
18
/* 2006-12 Erik Wetterberg : LOAD XML added */
19
20
#include "mysql_priv.h"
21
#include <my_dir.h>
22
#include <m_ctype.h>
23
#include "sql_repl.h"
24
25
class XML_TAG {
26
public:
27
  int level;
28
  String field;
29
  String value;
30
  XML_TAG(int l, String f, String v);
31
};
32
33
34
XML_TAG::XML_TAG(int l, String f, String v)
35
{
36
  level= l;
37
  field.append(f);
38
  value.append(v);
39
}
40
41
42
class READ_INFO {
43
  File	file;
44
  uchar	*buffer,			/* Buffer for read text */
45
	*end_of_buff;			/* Data in bufferts ends here */
46
  uint	buff_length,			/* Length of buffert */
47
	max_length;			/* Max length of row */
48
  char	*field_term_ptr,*line_term_ptr,*line_start_ptr,*line_start_end;
49
  uint	field_term_length,line_term_length,enclosed_length;
50
  int	field_term_char,line_term_char,enclosed_char,escape_char;
51
  int	*stack,*stack_pos;
52
  bool	found_end_of_line,start_of_line,eof;
53
  bool  need_end_io_cache;
54
  IO_CACHE cache;
55
  NET *io_net;
56
  int level; /* for load xml */
57
58
public:
59
  bool error,line_cuted,found_null,enclosed;
60
  uchar	*row_start,			/* Found row starts here */
61
	*row_end;			/* Found row ends here */
62
  CHARSET_INFO *read_charset;
63
64
  READ_INFO(File file,uint tot_length,CHARSET_INFO *cs,
65
	    String &field_term,String &line_start,String &line_term,
66
	    String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
67
  ~READ_INFO();
68
  int read_field();
69
  int read_fixed_length(void);
70
  int next_line(void);
71
  char unescape(char chr);
72
  int terminator(char *ptr,uint length);
73
  bool find_start_of_fields();
74
  /* load xml */
75
  List<XML_TAG> taglist;
76
  int read_value(int delim, String *val);
77
  int read_xml();
78
  int clear_level(int level);
79
80
  /*
81
    We need to force cache close before destructor is invoked to log
82
    the last read block
83
  */
84
  void end_io_cache()
85
  {
86
    ::end_io_cache(&cache);
87
    need_end_io_cache = 0;
88
  }
89
90
  /*
91
    Either this method, or we need to make cache public
92
    Arg must be set from mysql_load() since constructor does not see
93
    either the table or THD value
94
  */
95
  void set_io_cache_arg(void* arg) { cache.arg = arg; }
96
};
97
98
static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
99
                             List<Item> &fields_vars, List<Item> &set_fields,
100
                             List<Item> &set_values, READ_INFO &read_info,
101
			     ulong skip_lines,
102
			     bool ignore_check_option_errors);
103
static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
104
                          List<Item> &fields_vars, List<Item> &set_fields,
105
                          List<Item> &set_values, READ_INFO &read_info,
106
			  String &enclosed, ulong skip_lines,
107
			  bool ignore_check_option_errors);
108
109
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
110
                          List<Item> &fields_vars, List<Item> &set_fields,
111
                          List<Item> &set_values, READ_INFO &read_info,
112
                          String &enclosed, ulong skip_lines,
113
                          bool ignore_check_option_errors);
114
115
static bool write_execute_load_query_log_event(THD *thd,
116
					       bool duplicates, bool ignore,
117
					       bool transactional_table,
118
                                               THD::killed_state killed_status);
119
120
/*
121
  Execute LOAD DATA query
122
123
  SYNOPSYS
124
    mysql_load()
125
      thd - current thread
126
      ex  - sql_exchange object representing source file and its parsing rules
127
      table_list  - list of tables to which we are loading data
128
      fields_vars - list of fields and variables to which we read
129
                    data from file
130
      set_fields  - list of fields mentioned in set clause
131
      set_values  - expressions to assign to fields in previous list
132
      handle_duplicates - indicates whenever we should emit error or
133
                          replace row if we will meet duplicates.
134
      ignore -          - indicates whenever we should ignore duplicates
135
      read_file_from_client - is this LOAD DATA LOCAL ?
136
137
  RETURN VALUES
138
    TRUE - error / FALSE - success
139
*/
140
141
int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
142
	        List<Item> &fields_vars, List<Item> &set_fields,
143
                List<Item> &set_values,
144
                enum enum_duplicates handle_duplicates, bool ignore,
145
                bool read_file_from_client)
146
{
147
  char name[FN_REFLEN];
148
  File file;
149
  TABLE *table= NULL;
150
  int error;
151
  String *field_term=ex->field_term,*escaped=ex->escaped;
152
  String *enclosed=ex->enclosed;
153
  bool is_fifo=0;
154
  LOAD_FILE_INFO lf_info;
155
  char *db = table_list->db;			// This is never null
156
  /*
157
    If path for file is not defined, we will use the current database.
158
    If this is not set, we will use the directory where the table to be
159
    loaded is located
160
  */
161
  char *tdb= thd->db ? thd->db : db;		// Result is never null
162
  ulong skip_lines= ex->skip_lines;
163
  bool transactional_table;
164
  THD::killed_state killed_status= THD::NOT_KILLED;
165
166
  if (escaped->length() > 1 || enclosed->length() > 1)
167
  {
168
    my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
169
	       MYF(0));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
170
    return(TRUE);
1 by brian
clean slate
171
  }
172
  if (open_and_lock_tables(thd, table_list))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
173
    return(TRUE);
1 by brian
clean slate
174
  if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
175
                                    &thd->lex->select_lex.top_join_list,
176
                                    table_list,
177
                                    &thd->lex->select_lex.leaf_tables, true))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
178
     return(-1);
1 by brian
clean slate
179
180
  /*
181
    Let us emit an error if we are loading data to table which is used
182
    in subselect in SET clause like we do it for INSERT.
183
184
    The main thing to fix to remove this restriction is to ensure that the
185
    table is marked to be 'used for insert' in which case we should never
186
    mark this table as 'const table' (ie, one that has only one row).
187
  */
188
  if (unique_table(thd, table_list, table_list->next_global, 0))
189
  {
190
    my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
191
    return(TRUE);
1 by brian
clean slate
192
  }
193
194
  table= table_list->table;
195
  transactional_table= table->file->has_transactions();
196
197
  if (!fields_vars.elements)
198
  {
199
    Field **field;
200
    for (field=table->field; *field ; field++)
201
      fields_vars.push_back(new Item_field(*field));
202
    bitmap_set_all(table->write_set);
203
    table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
204
    /*
205
      Let us also prepare SET clause, altough it is probably empty
206
      in this case.
207
    */
208
    if (setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
209
        setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
210
      return(TRUE);
1 by brian
clean slate
211
  }
212
  else
213
  {						// Part field list
214
    /* TODO: use this conds for 'WITH CHECK OPTIONS' */
215
    if (setup_fields(thd, 0, fields_vars, MARK_COLUMNS_WRITE, 0, 0) ||
216
        setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
217
        check_that_all_fields_are_given_values(thd, table, table_list))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
218
      return(TRUE);
1 by brian
clean slate
219
    /*
220
      Check whenever TIMESTAMP field with auto-set feature specified
221
      explicitly.
222
    */
223
    if (table->timestamp_field)
224
    {
225
      if (bitmap_is_set(table->write_set,
226
                        table->timestamp_field->field_index))
227
        table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
228
      else
229
      {
230
        bitmap_set_bit(table->write_set,
231
                       table->timestamp_field->field_index);
232
      }
233
    }
234
    /* Fix the expressions in SET clause */
235
    if (setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
236
      return(TRUE);
1 by brian
clean slate
237
  }
238
239
  table->mark_columns_needed_for_insert();
240
241
  uint tot_length=0;
242
  bool use_blobs= 0, use_vars= 0;
243
  List_iterator_fast<Item> it(fields_vars);
244
  Item *item;
245
246
  while ((item= it++))
247
  {
248
    Item *real_item= item->real_item();
249
250
    if (real_item->type() == Item::FIELD_ITEM)
251
    {
252
      Field *field= ((Item_field*)real_item)->field;
253
      if (field->flags & BLOB_FLAG)
254
      {
255
        use_blobs= 1;
256
        tot_length+= 256;			// Will be extended if needed
257
      }
258
      else
259
        tot_length+= field->field_length;
260
    }
261
    else if (item->type() == Item::STRING_ITEM)
262
      use_vars= 1;
263
  }
264
  if (use_blobs && !ex->line_term->length() && !field_term->length())
265
  {
266
    my_message(ER_BLOBS_AND_NO_TERMINATED,ER(ER_BLOBS_AND_NO_TERMINATED),
267
	       MYF(0));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
268
    return(TRUE);
1 by brian
clean slate
269
  }
270
  if (use_vars && !field_term->length() && !enclosed->length())
271
  {
272
    my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
273
    return(TRUE);
1 by brian
clean slate
274
  }
275
276
  /* We can't give an error in the middle when using LOCAL files */
277
  if (read_file_from_client && handle_duplicates == DUP_ERROR)
278
    ignore= 1;
279
280
  if (read_file_from_client)
281
  {
282
    (void)net_request_file(&thd->net,ex->file_name);
283
    file = -1;
284
  }
285
  else
286
  {
287
#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS
288
    ex->file_name+=dirname_length(ex->file_name);
289
#endif
290
    if (!dirname_length(ex->file_name))
291
    {
292
      strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
293
      (void) fn_format(name, ex->file_name, name, "",
294
		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
295
    }
296
    else
297
    {
298
      (void) fn_format(name, ex->file_name, mysql_real_data_home, "",
299
		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
300
301
      if (opt_secure_file_priv &&
302
          strncmp(opt_secure_file_priv, name, strlen(opt_secure_file_priv)))
303
      {
304
        /* Read only allowed from within dir specified by secure_file_priv */
305
        my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
306
        return(TRUE);
1 by brian
clean slate
307
      }
308
15 by brian
Fix for stat, NETWARE removal
309
      struct stat stat_info;
310
      if (stat(name,&stat_info))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
311
	return(TRUE);
1 by brian
clean slate
312
313
      // if we are not in slave thread, the file must be:
314
      if (!thd->slave_thread &&
315
	  !((stat_info.st_mode & S_IROTH) == S_IROTH &&  // readable by others
316
	    (stat_info.st_mode & S_IFLNK) != S_IFLNK && // and not a symlink
317
	    ((stat_info.st_mode & S_IFREG) == S_IFREG ||
318
	     (stat_info.st_mode & S_IFIFO) == S_IFIFO)))
319
      {
320
	my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
321
	return(TRUE);
1 by brian
clean slate
322
      }
323
      if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
324
	is_fifo = 1;
325
    }
326
    if ((file=my_open(name,O_RDONLY,MYF(MY_WME))) < 0)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
327
      return(TRUE);
1 by brian
clean slate
328
  }
329
330
  COPY_INFO info;
331
  bzero((char*) &info,sizeof(info));
332
  info.ignore= ignore;
333
  info.handle_duplicates=handle_duplicates;
334
  info.escape_char=escaped->length() ? (*escaped)[0] : INT_MAX;
335
336
  READ_INFO read_info(file,tot_length,
337
                      ex->cs ? ex->cs : thd->variables.collation_database,
338
		      *field_term,*ex->line_start, *ex->line_term, *enclosed,
339
		      info.escape_char, read_file_from_client, is_fifo);
340
  if (read_info.error)
341
  {
342
    if	(file >= 0)
343
      my_close(file,MYF(0));			// no files in net reading
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
344
    return(TRUE);				// Can't allocate buffers
1 by brian
clean slate
345
  }
346
347
  if (mysql_bin_log.is_open())
348
  {
349
    lf_info.thd = thd;
350
    lf_info.wrote_create_file = 0;
351
    lf_info.last_pos_in_file = HA_POS_ERROR;
352
    lf_info.log_delayed= transactional_table;
353
    read_info.set_io_cache_arg((void*) &lf_info);
354
  }
355
356
  thd->count_cuted_fields= CHECK_FIELD_WARN;		/* calc cuted fields */
357
  thd->cuted_fields=0L;
358
  /* Skip lines if there is a line terminator */
359
  if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
360
  {
361
    /* ex->skip_lines needs to be preserved for logging */
362
    while (skip_lines > 0)
363
    {
364
      skip_lines--;
365
      if (read_info.next_line())
366
	break;
367
    }
368
  }
369
370
  if (!(error=test(read_info.error)))
371
  {
372
373
    table->next_number_field=table->found_next_number_field;
374
    if (ignore ||
375
	handle_duplicates == DUP_REPLACE)
376
      table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
377
    if (handle_duplicates == DUP_REPLACE)
378
        table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
379
    table->file->ha_start_bulk_insert((ha_rows) 0);
380
    table->copy_blobs=1;
381
382
    thd->abort_on_warning= (!ignore &&
383
                            (thd->variables.sql_mode &
384
                             (MODE_STRICT_TRANS_TABLES |
385
                              MODE_STRICT_ALL_TABLES)));
386
387
    if (ex->filetype == FILETYPE_XML) /* load xml */
388
      error= read_xml_field(thd, info, table_list, fields_vars,
389
                            set_fields, set_values, read_info,
390
                            *(ex->line_term), skip_lines, ignore);
391
    else if (!field_term->length() && !enclosed->length())
392
      error= read_fixed_length(thd, info, table_list, fields_vars,
393
                               set_fields, set_values, read_info,
394
			       skip_lines, ignore);
395
    else
396
      error= read_sep_field(thd, info, table_list, fields_vars,
397
                            set_fields, set_values, read_info,
398
			    *enclosed, skip_lines, ignore);
399
    if (table->file->ha_end_bulk_insert() && !error)
400
    {
401
      table->file->print_error(my_errno, MYF(0));
402
      error= 1;
403
    }
404
    table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
405
    table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
406
    table->next_number_field=0;
407
  }
408
  if (file >= 0)
409
    my_close(file,MYF(0));
410
  free_blobs(table);				/* if pack_blob was used */
411
  table->copy_blobs=0;
412
  thd->count_cuted_fields= CHECK_FIELD_IGNORE;
413
  /* 
414
     simulated killing in the middle of per-row loop
415
     must be effective for binlogging
416
  */
417
  DBUG_EXECUTE_IF("simulate_kill_bug27571",
418
                  {
419
                    error=1;
420
                    thd->killed= THD::KILL_QUERY;
421
                  };);
422
  killed_status= (error == 0)? THD::NOT_KILLED : thd->killed;
423
  if (error)
424
  {
425
    if (read_file_from_client)
426
      while (!read_info.next_line())
427
	;
428
429
    if (mysql_bin_log.is_open())
430
    {
431
      {
432
	/*
433
	  Make sure last block (the one which caused the error) gets
434
	  logged.  This is needed because otherwise after write of (to
435
	  the binlog, not to read_info (which is a cache))
436
	  Delete_file_log_event the bad block will remain in read_info
437
	  (because pre_read is not called at the end of the last
438
	  block; remember pre_read is called whenever a new block is
439
	  read from disk).  At the end of mysql_load(), the destructor
440
	  of read_info will call end_io_cache() which will flush
441
	  read_info, so we will finally have this in the binlog:
442
443
	  Append_block # The last successfull block
444
	  Delete_file
445
	  Append_block # The failing block
446
	  which is nonsense.
447
	  Or could also be (for a small file)
448
	  Create_file  # The failing block
449
	  which is nonsense (Delete_file is not written in this case, because:
450
	  Create_file has not been written, so Delete_file is not written, then
451
	  when read_info is destroyed end_io_cache() is called which writes
452
	  Create_file.
453
	*/
454
	read_info.end_io_cache();
455
	/* If the file was not empty, wrote_create_file is true */
456
	if (lf_info.wrote_create_file)
457
	{
458
	  if (thd->transaction.stmt.modified_non_trans_table)
459
	    write_execute_load_query_log_event(thd, handle_duplicates,
460
					       ignore, transactional_table,
461
                                               killed_status);
462
	  else
463
	  {
464
	    Delete_file_log_event d(thd, db, transactional_table);
465
            d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
466
	    mysql_bin_log.write(&d);
467
	  }
468
	}
469
      }
470
    }
471
    error= -1;				// Error on read
472
    goto err;
473
  }
474
  sprintf(name, ER(ER_LOAD_INFO), (ulong) info.records, (ulong) info.deleted,
475
	  (ulong) (info.records - info.copied), (ulong) thd->cuted_fields);
476
477
  if (thd->transaction.stmt.modified_non_trans_table)
478
    thd->transaction.all.modified_non_trans_table= TRUE;
479
480
  if (mysql_bin_log.is_open())
481
  {
482
    /*
483
      We need to do the job that is normally done inside
484
      binlog_query() here, which is to ensure that the pending event
485
      is written before tables are unlocked and before any other
486
      events are written.  We also need to update the table map
487
      version for the binary log to mark that table maps are invalid
488
      after this point.
489
     */
490
    if (thd->current_stmt_binlog_row_based)
491
      thd->binlog_flush_pending_rows_event(true);
492
    else
493
    {
494
      /*
495
        As already explained above, we need to call end_io_cache() or the last
496
        block will be logged only after Execute_load_query_log_event (which is
497
        wrong), when read_info is destroyed.
498
      */
499
      read_info.end_io_cache();
500
      if (lf_info.wrote_create_file)
501
      {
502
        write_execute_load_query_log_event(thd, handle_duplicates, ignore,
503
                                           transactional_table,killed_status);
504
      }
505
    }
506
  }
507
508
  /* ok to client sent only after binlog write and engine commit */
509
  my_ok(thd, info.copied + info.deleted, 0L, name);
510
err:
511
  DBUG_ASSERT(transactional_table || !(info.copied || info.deleted) ||
512
              thd->transaction.stmt.modified_non_trans_table);
513
  table->file->ha_release_auto_increment();
514
  table->auto_increment_field_not_null= FALSE;
515
  thd->abort_on_warning= 0;
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
516
  return(error);
1 by brian
clean slate
517
}
518
519
520
/* Not a very useful function; just to avoid duplication of code */
521
static bool write_execute_load_query_log_event(THD *thd,
522
					       bool duplicates, bool ignore,
523
					       bool transactional_table,
524
                                               THD::killed_state killed_err_arg)
525
{
526
  Execute_load_query_log_event
527
    e(thd, thd->query, thd->query_length,
528
      (char*)thd->lex->fname_start - (char*)thd->query,
529
      (char*)thd->lex->fname_end - (char*)thd->query,
530
      (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
531
      (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
532
      transactional_table, FALSE, killed_err_arg);
533
  e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
534
  return mysql_bin_log.write(&e);
535
}
536
537
538
/****************************************************************************
539
** Read of rows of fixed size + optional garage + optonal newline
540
****************************************************************************/
541
542
static int
543
read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
544
                  List<Item> &fields_vars, List<Item> &set_fields,
545
                  List<Item> &set_values, READ_INFO &read_info,
546
                  ulong skip_lines, bool ignore_check_option_errors)
547
{
548
  List_iterator_fast<Item> it(fields_vars);
549
  Item_field *sql_field;
550
  TABLE *table= table_list->table;
551
  ulonglong id;
552
  bool err;
553
554
  id= 0;
555
 
556
  while (!read_info.read_fixed_length())
557
  {
558
    if (thd->killed)
559
    {
560
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
561
      return(1);
1 by brian
clean slate
562
    }
563
    if (skip_lines)
564
    {
565
      /*
566
	We could implement this with a simple seek if:
567
	- We are not using DATA INFILE LOCAL
568
	- escape character is  ""
569
	- line starting prefix is ""
570
      */
571
      skip_lines--;
572
      continue;
573
    }
574
    it.rewind();
575
    uchar *pos=read_info.row_start;
576
#ifdef HAVE_purify
577
    read_info.row_end[0]=0;
578
#endif
579
580
    restore_record(table, s->default_values);
581
    /*
582
      There is no variables in fields_vars list in this format so
583
      this conversion is safe.
584
    */
585
    while ((sql_field= (Item_field*) it++))
586
    {
587
      Field *field= sql_field->field;                  
588
      if (field == table->next_number_field)
589
        table->auto_increment_field_not_null= TRUE;
590
      /*
591
        No fields specified in fields_vars list can be null in this format.
592
        Mark field as not null, we should do this for each row because of
593
        restore_record...
594
      */
595
      field->set_notnull();
596
597
      if (pos == read_info.row_end)
598
      {
599
        thd->cuted_fields++;			/* Not enough fields */
600
        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
601
                            ER_WARN_TOO_FEW_RECORDS, 
602
                            ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
603
        if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
604
            ((Field_timestamp*) field)->set_time();
605
      }
606
      else
607
      {
608
	uint length;
609
	uchar save_chr;
610
	if ((length=(uint) (read_info.row_end-pos)) >
611
	    field->field_length)
612
	  length=field->field_length;
613
	save_chr=pos[length]; pos[length]='\0'; // Safeguard aganst malloc
614
        field->store((char*) pos,length,read_info.read_charset);
615
	pos[length]=save_chr;
616
	if ((pos+=length) > read_info.row_end)
617
	  pos= read_info.row_end;	/* Fills rest with space */
618
      }
619
    }
620
    if (pos != read_info.row_end)
621
    {
622
      thd->cuted_fields++;			/* To long row */
623
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
624
                          ER_WARN_TOO_MANY_RECORDS, 
625
                          ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); 
626
    }
627
628
    if (thd->killed ||
629
        fill_record(thd, set_fields, set_values,
630
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
631
      return(1);
1 by brian
clean slate
632
633
    err= write_record(thd, table, &info);
634
    table->auto_increment_field_not_null= FALSE;
635
    if (err)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
636
      return(1);
1 by brian
clean slate
637
   
638
    /*
639
      We don't need to reset auto-increment field since we are restoring
640
      its default value at the beginning of each loop iteration.
641
    */
642
    if (read_info.next_line())			// Skip to next line
643
      break;
644
    if (read_info.line_cuted)
645
    {
646
      thd->cuted_fields++;			/* To long row */
647
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
648
                          ER_WARN_TOO_MANY_RECORDS, 
649
                          ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); 
650
    }
651
    thd->row_count++;
652
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
653
  return(test(read_info.error));
1 by brian
clean slate
654
}
655
656
657
658
static int
659
read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
660
               List<Item> &fields_vars, List<Item> &set_fields,
661
               List<Item> &set_values, READ_INFO &read_info,
662
	       String &enclosed, ulong skip_lines,
663
	       bool ignore_check_option_errors)
664
{
665
  List_iterator_fast<Item> it(fields_vars);
666
  Item *item;
667
  TABLE *table= table_list->table;
668
  uint enclosed_length;
669
  ulonglong id;
670
  bool err;
671
672
  enclosed_length=enclosed.length();
673
  id= 0;
674
675
  for (;;it.rewind())
676
  {
677
    if (thd->killed)
678
    {
679
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
680
      return(1);
1 by brian
clean slate
681
    }
682
683
    restore_record(table, s->default_values);
684
685
    while ((item= it++))
686
    {
687
      uint length;
688
      uchar *pos;
689
      Item *real_item;
690
691
      if (read_info.read_field())
692
	break;
693
694
      /* If this line is to be skipped we don't want to fill field or var */
695
      if (skip_lines)
696
        continue;
697
698
      pos=read_info.row_start;
699
      length=(uint) (read_info.row_end-pos);
700
701
      real_item= item->real_item();
702
703
      if ((!read_info.enclosed && (enclosed_length && length == 4 && !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
704
	  (length == 1 && read_info.found_null))
705
      {
706
707
        if (real_item->type() == Item::FIELD_ITEM)
708
        {
709
          Field *field= ((Item_field *)real_item)->field;
710
          if (field->reset())
711
          {
712
            my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
713
                     thd->row_count);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
714
            return(1);
1 by brian
clean slate
715
          }
716
          field->set_null();
717
          if (!field->maybe_null())
718
          {
719
            if (field->type() == MYSQL_TYPE_TIMESTAMP)
720
              ((Field_timestamp*) field)->set_time();
721
            else if (field != table->next_number_field)
722
              field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
723
                                 ER_WARN_NULL_TO_NOTNULL, 1);
724
          }
725
	}
726
        else if (item->type() == Item::STRING_ITEM)
727
        {
728
          ((Item_user_var_as_out_param *)item)->set_null_value(
729
                                                  read_info.read_charset);
730
        }
731
        else
732
        {
733
          my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
734
          return(1);
1 by brian
clean slate
735
        }
736
737
	continue;
738
      }
739
740
      if (real_item->type() == Item::FIELD_ITEM)
741
      {
742
        Field *field= ((Item_field *)real_item)->field;
743
        field->set_notnull();
744
        read_info.row_end[0]=0;			// Safe to change end marker
745
        if (field == table->next_number_field)
746
          table->auto_increment_field_not_null= TRUE;
747
        field->store((char*) pos, length, read_info.read_charset);
748
      }
749
      else if (item->type() == Item::STRING_ITEM)
750
      {
751
        ((Item_user_var_as_out_param *)item)->set_value((char*) pos, length,
752
                                                        read_info.read_charset);
753
      }
754
      else
755
      {
756
        my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
757
        return(1);
1 by brian
clean slate
758
      }
759
    }
760
    if (read_info.error)
761
      break;
762
    if (skip_lines)
763
    {
764
      skip_lines--;
765
      continue;
766
    }
767
    if (item)
768
    {
769
      /* Have not read any field, thus input file is simply ended */
770
      if (item == fields_vars.head())
771
	break;
772
      for (; item ; item= it++)
773
      {
774
        Item *real_item= item->real_item();
775
        if (real_item->type() == Item::FIELD_ITEM)
776
        {
777
          Field *field= ((Item_field *)real_item)->field;
778
          if (field->reset())
779
          {
780
            my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name,
781
                     thd->row_count);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
782
            return(1);
1 by brian
clean slate
783
          }
784
          if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
785
              ((Field_timestamp*) field)->set_time();
786
          /*
787
            QQ: We probably should not throw warning for each field.
788
            But how about intention to always have the same number
789
            of warnings in THD::cuted_fields (and get rid of cuted_fields
790
            in the end ?)
791
          */
792
          thd->cuted_fields++;
793
          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
794
                              ER_WARN_TOO_FEW_RECORDS,
795
                              ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
796
        }
797
        else if (item->type() == Item::STRING_ITEM)
798
        {
799
          ((Item_user_var_as_out_param *)item)->set_null_value(
800
                                                  read_info.read_charset);
801
        }
802
        else
803
        {
804
          my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
805
          return(1);
1 by brian
clean slate
806
        }
807
      }
808
    }
809
810
    if (thd->killed ||
811
        fill_record(thd, set_fields, set_values,
812
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
813
      return(1);
1 by brian
clean slate
814
815
    err= write_record(thd, table, &info);
816
    table->auto_increment_field_not_null= FALSE;
817
    if (err)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
818
      return(1);
1 by brian
clean slate
819
    /*
820
      We don't need to reset auto-increment field since we are restoring
821
      its default value at the beginning of each loop iteration.
822
    */
823
    if (read_info.next_line())			// Skip to next line
824
      break;
825
    if (read_info.line_cuted)
826
    {
827
      thd->cuted_fields++;			/* To long row */
828
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
829
                          ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS), 
830
                          thd->row_count);   
831
      if (thd->killed)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
832
        return(1);
1 by brian
clean slate
833
    }
834
    thd->row_count++;
835
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
836
  return(test(read_info.error));
1 by brian
clean slate
837
}
838
839
840
/****************************************************************************
841
** Read rows in xml format
842
****************************************************************************/
843
static int
844
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
845
               List<Item> &fields_vars, List<Item> &set_fields,
846
               List<Item> &set_values, READ_INFO &read_info,
847
               String &row_tag, ulong skip_lines,
848
               bool ignore_check_option_errors)
849
{
850
  List_iterator_fast<Item> it(fields_vars);
851
  Item *item;
852
  TABLE *table= table_list->table;
853
  bool no_trans_update_stmt;
854
  CHARSET_INFO *cs= read_info.read_charset;
855
  
856
  no_trans_update_stmt= !table->file->has_transactions();
857
  
858
  for ( ; ; it.rewind())
859
  {
860
    if (thd->killed)
861
    {
862
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
863
      return(1);
1 by brian
clean slate
864
    }
865
    
866
    // read row tag and save values into tag list
867
    if (read_info.read_xml())
868
      break;
869
    
870
    List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
871
    xmlit.rewind();
872
    XML_TAG *tag= NULL;
873
    
874
    
875
    restore_record(table, s->default_values);
876
    
877
    while ((item= it++))
878
    {
879
      /* If this line is to be skipped we don't want to fill field or var */
880
      if (skip_lines)
881
        continue;
882
      
883
      /* find field in tag list */
884
      xmlit.rewind();
885
      tag= xmlit++;
886
      
887
      while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
888
        tag= xmlit++;
889
      
890
      if (!tag) // found null
891
      {
892
        if (item->type() == Item::FIELD_ITEM)
893
        {
894
          Field *field= ((Item_field *) item)->field;
895
          field->reset();
896
          field->set_null();
897
          if (field == table->next_number_field)
898
            table->auto_increment_field_not_null= TRUE;
899
          if (!field->maybe_null())
900
          {
901
            if (field->type() == FIELD_TYPE_TIMESTAMP)
902
              ((Field_timestamp *) field)->set_time();
903
            else if (field != table->next_number_field)
904
              field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
905
                                 ER_WARN_NULL_TO_NOTNULL, 1);
906
          }
907
        }
908
        else
909
          ((Item_user_var_as_out_param *) item)->set_null_value(cs);
910
        continue;
911
      }
912
913
      if (item->type() == Item::FIELD_ITEM)
914
      {
915
916
        Field *field= ((Item_field *)item)->field;
917
        field->set_notnull();
918
        if (field == table->next_number_field)
919
          table->auto_increment_field_not_null= TRUE;
920
        field->store((char *) tag->value.ptr(), tag->value.length(), cs);
921
      }
922
      else
923
        ((Item_user_var_as_out_param *) item)->set_value(
924
                                                 (char *) tag->value.ptr(), 
925
                                                 tag->value.length(), cs);
926
    }
927
    
928
    if (read_info.error)
929
      break;
930
    
931
    if (skip_lines)
932
    {
933
      skip_lines--;
934
      continue;
935
    }
936
    
937
    if (item)
938
    {
939
      /* Have not read any field, thus input file is simply ended */
940
      if (item == fields_vars.head())
941
        break;
942
      
943
      for ( ; item; item= it++)
944
      {
945
        if (item->type() == Item::FIELD_ITEM)
946
        {
947
          /*
948
            QQ: We probably should not throw warning for each field.
949
            But how about intention to always have the same number
950
            of warnings in THD::cuted_fields (and get rid of cuted_fields
951
            in the end ?)
952
          */
953
          thd->cuted_fields++;
954
          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
955
                              ER_WARN_TOO_FEW_RECORDS,
956
                              ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
957
        }
958
        else
959
          ((Item_user_var_as_out_param *)item)->set_null_value(cs);
960
      }
961
    }
962
963
    if (thd->killed || fill_record(thd, set_fields, set_values,
964
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
965
      return(1);
1 by brian
clean slate
966
967
    if (write_record(thd, table, &info))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
968
      return(1);
1 by brian
clean slate
969
    
970
    /*
971
      We don't need to reset auto-increment field since we are restoring
972
      its default value at the beginning of each loop iteration.
973
    */
974
    thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
975
    thd->row_count++;
976
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
977
  return(test(read_info.error));
1 by brian
clean slate
978
} /* load xml end */
979
980
981
/* Unescape all escape characters, mark \N as null */
982
983
char
984
READ_INFO::unescape(char chr)
985
{
986
  /* keep this switch synchornous with the ESCAPE_CHARS macro */
987
  switch(chr) {
988
  case 'n': return '\n';
989
  case 't': return '\t';
990
  case 'r': return '\r';
991
  case 'b': return '\b';
992
  case '0': return 0;				// Ascii null
993
  case 'Z': return '\032';			// Win32 end of file
994
  case 'N': found_null=1;
995
996
    /* fall through */
997
  default:  return chr;
998
  }
999
}
1000
1001
1002
/*
1003
  Read a line using buffering
1004
  If last line is empty (in line mode) then it isn't outputed
1005
*/
1006
1007
1008
READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
1009
		     String &field_term, String &line_start, String &line_term,
1010
		     String &enclosed_par, int escape, bool get_it_from_net,
1011
		     bool is_fifo)
1012
  :file(file_par),escape_char(escape)
1013
{
1014
  read_charset= cs;
1015
  field_term_ptr=(char*) field_term.ptr();
1016
  field_term_length= field_term.length();
1017
  line_term_ptr=(char*) line_term.ptr();
1018
  line_term_length= line_term.length();
1019
  level= 0; /* for load xml */
1020
  if (line_start.length() == 0)
1021
  {
1022
    line_start_ptr=0;
1023
    start_of_line= 0;
1024
  }
1025
  else
1026
  {
1027
    line_start_ptr=(char*) line_start.ptr();
1028
    line_start_end=line_start_ptr+line_start.length();
1029
    start_of_line= 1;
1030
  }
1031
  /* If field_terminator == line_terminator, don't use line_terminator */
1032
  if (field_term_length == line_term_length &&
1033
      !memcmp(field_term_ptr,line_term_ptr,field_term_length))
1034
  {
1035
    line_term_length=0;
1036
    line_term_ptr=(char*) "";
1037
  }
1038
  enclosed_char= (enclosed_length=enclosed_par.length()) ?
1039
    (uchar) enclosed_par[0] : INT_MAX;
1040
  field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
1041
  line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
1042
  error=eof=found_end_of_line=found_null=line_cuted=0;
1043
  buff_length=tot_length;
1044
1045
1046
  /* Set of a stack for unget if long terminators */
1047
  uint length=max(field_term_length,line_term_length)+1;
1048
  set_if_bigger(length,line_start.length());
1049
  stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
1050
1051
  if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(0))))
1052
    error=1; /* purecov: inspected */
1053
  else
1054
  {
1055
    end_of_buff=buffer+buff_length;
1056
    if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1057
		      (get_it_from_net) ? READ_NET :
1058
		      (is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1059
		      MYF(MY_WME)))
1060
    {
1061
      my_free((uchar*) buffer,MYF(0)); /* purecov: inspected */
1062
      error=1;
1063
    }
1064
    else
1065
    {
1066
      /*
1067
	init_io_cache() will not initialize read_function member
1068
	if the cache is READ_NET. So we work around the problem with a
1069
	manual assignment
1070
      */
1071
      need_end_io_cache = 1;
1072
1073
      if (get_it_from_net)
1074
	cache.read_function = _my_b_net_read;
1075
1076
      if (mysql_bin_log.is_open())
1077
	cache.pre_read = cache.pre_close =
1078
	  (IO_CACHE_CALLBACK) log_loaded_block;
1079
    }
1080
  }
1081
}
1082
1083
1084
READ_INFO::~READ_INFO()
1085
{
1086
  if (!error)
1087
  {
1088
    if (need_end_io_cache)
1089
      ::end_io_cache(&cache);
1090
    my_free((uchar*) buffer,MYF(0));
1091
    error=1;
1092
  }
1093
  List_iterator<XML_TAG> xmlit(taglist);
1094
  XML_TAG *t;
1095
  while ((t= xmlit++))
1096
    delete(t);
1097
}
1098
1099
1100
#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
1101
#define PUSH(A) *(stack_pos++)=(A)
1102
1103
1104
inline int READ_INFO::terminator(char *ptr,uint length)
1105
{
1106
  int chr=0;					// Keep gcc happy
1107
  uint i;
1108
  for (i=1 ; i < length ; i++)
1109
  {
1110
    if ((chr=GET) != *++ptr)
1111
    {
1112
      break;
1113
    }
1114
  }
1115
  if (i == length)
1116
    return 1;
1117
  PUSH(chr);
1118
  while (i-- > 1)
1119
    PUSH((uchar) *--ptr);
1120
  return 0;
1121
}
1122
1123
1124
int READ_INFO::read_field()
1125
{
1126
  int chr,found_enclosed_char;
1127
  uchar *to,*new_buffer;
1128
1129
  found_null=0;
1130
  if (found_end_of_line)
1131
    return 1;					// One have to call next_line
1132
1133
  /* Skip until we find 'line_start' */
1134
1135
  if (start_of_line)
1136
  {						// Skip until line_start
1137
    start_of_line=0;
1138
    if (find_start_of_fields())
1139
      return 1;
1140
  }
1141
  if ((chr=GET) == my_b_EOF)
1142
  {
1143
    found_end_of_line=eof=1;
1144
    return 1;
1145
  }
1146
  to=buffer;
1147
  if (chr == enclosed_char)
1148
  {
1149
    found_enclosed_char=enclosed_char;
1150
    *to++=(uchar) chr;				// If error
1151
  }
1152
  else
1153
  {
1154
    found_enclosed_char= INT_MAX;
1155
    PUSH(chr);
1156
  }
1157
1158
  for (;;)
1159
  {
1160
    while ( to < end_of_buff)
1161
    {
1162
      chr = GET;
1163
#ifdef USE_MB
1164
      if ((my_mbcharlen(read_charset, chr) > 1) &&
1165
          to+my_mbcharlen(read_charset, chr) <= end_of_buff)
1166
      {
1167
	  uchar* p = (uchar*)to;
1168
	  *to++ = chr;
1169
	  int ml = my_mbcharlen(read_charset, chr);
1170
	  int i;
1171
	  for (i=1; i<ml; i++) {
1172
	      chr = GET;
1173
	      if (chr == my_b_EOF)
1174
		  goto found_eof;
1175
	      *to++ = chr;
1176
	  }
1177
	  if (my_ismbchar(read_charset,
1178
                          (const char *)p,
1179
                          (const char *)to))
1180
	    continue;
1181
	  for (i=0; i<ml; i++)
1182
	    PUSH((uchar) *--to);
1183
	  chr = GET;
1184
      }
1185
#endif
1186
      if (chr == my_b_EOF)
1187
	goto found_eof;
1188
      if (chr == escape_char)
1189
      {
1190
	if ((chr=GET) == my_b_EOF)
1191
	{
1192
	  *to++= (uchar) escape_char;
1193
	  goto found_eof;
1194
	}
1195
        /*
1196
          When escape_char == enclosed_char, we treat it like we do for
1197
          handling quotes in SQL parsing -- you can double-up the
1198
          escape_char to include it literally, but it doesn't do escapes
1199
          like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1200
          with data like: "fie""ld1", "field2"
1201
         */
1202
        if (escape_char != enclosed_char || chr == escape_char)
1203
        {
1204
          *to++ = (uchar) unescape((char) chr);
1205
          continue;
1206
        }
1207
        PUSH(chr);
1208
        chr= escape_char;
1209
      }
1210
#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
1211
      if (chr == line_term_char)
1212
#else
1213
      if (chr == line_term_char && found_enclosed_char == INT_MAX)
1214
#endif
1215
      {
1216
	if (terminator(line_term_ptr,line_term_length))
1217
	{					// Maybe unexpected linefeed
1218
	  enclosed=0;
1219
	  found_end_of_line=1;
1220
	  row_start=buffer;
1221
	  row_end=  to;
1222
	  return 0;
1223
	}
1224
      }
1225
      if (chr == found_enclosed_char)
1226
      {
1227
	if ((chr=GET) == found_enclosed_char)
1228
	{					// Remove dupplicated
1229
	  *to++ = (uchar) chr;
1230
	  continue;
1231
	}
1232
	// End of enclosed field if followed by field_term or line_term
1233
	if (chr == my_b_EOF ||
1234
	    (chr == line_term_char && terminator(line_term_ptr, line_term_length)))
1235
	{					// Maybe unexpected linefeed
1236
	  enclosed=1;
1237
	  found_end_of_line=1;
1238
	  row_start=buffer+1;
1239
	  row_end=  to;
1240
	  return 0;
1241
	}
1242
	if (chr == field_term_char &&
1243
	    terminator(field_term_ptr,field_term_length))
1244
	{
1245
	  enclosed=1;
1246
	  row_start=buffer+1;
1247
	  row_end=  to;
1248
	  return 0;
1249
	}
1250
	/*
1251
	  The string didn't terminate yet.
1252
	  Store back next character for the loop
1253
	*/
1254
	PUSH(chr);
1255
	/* copy the found term character to 'to' */
1256
	chr= found_enclosed_char;
1257
      }
1258
      else if (chr == field_term_char && found_enclosed_char == INT_MAX)
1259
      {
1260
	if (terminator(field_term_ptr,field_term_length))
1261
	{
1262
	  enclosed=0;
1263
	  row_start=buffer;
1264
	  row_end=  to;
1265
	  return 0;
1266
	}
1267
      }
1268
      *to++ = (uchar) chr;
1269
    }
1270
    /*
1271
    ** We come here if buffer is too small. Enlarge it and continue
1272
    */
1273
    if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE,
1274
					MYF(MY_WME))))
1275
      return (error=1);
1276
    to=new_buffer + (to-buffer);
1277
    buffer=new_buffer;
1278
    buff_length+=IO_SIZE;
1279
    end_of_buff=buffer+buff_length;
1280
  }
1281
1282
found_eof:
1283
  enclosed=0;
1284
  found_end_of_line=eof=1;
1285
  row_start=buffer;
1286
  row_end=to;
1287
  return 0;
1288
}
1289
1290
/*
1291
  Read a row with fixed length.
1292
1293
  NOTES
1294
    The row may not be fixed size on disk if there are escape
1295
    characters in the file.
1296
1297
  IMPLEMENTATION NOTE
1298
    One can't use fixed length with multi-byte charset **
1299
1300
  RETURN
1301
    0  ok
1302
    1  error
1303
*/
1304
1305
int READ_INFO::read_fixed_length()
1306
{
1307
  int chr;
1308
  uchar *to;
1309
  if (found_end_of_line)
1310
    return 1;					// One have to call next_line
1311
1312
  if (start_of_line)
1313
  {						// Skip until line_start
1314
    start_of_line=0;
1315
    if (find_start_of_fields())
1316
      return 1;
1317
  }
1318
1319
  to=row_start=buffer;
1320
  while (to < end_of_buff)
1321
  {
1322
    if ((chr=GET) == my_b_EOF)
1323
      goto found_eof;
1324
    if (chr == escape_char)
1325
    {
1326
      if ((chr=GET) == my_b_EOF)
1327
      {
1328
	*to++= (uchar) escape_char;
1329
	goto found_eof;
1330
      }
1331
      *to++ =(uchar) unescape((char) chr);
1332
      continue;
1333
    }
1334
    if (chr == line_term_char)
1335
    {
1336
      if (terminator(line_term_ptr,line_term_length))
1337
      {						// Maybe unexpected linefeed
1338
	found_end_of_line=1;
1339
	row_end=  to;
1340
	return 0;
1341
      }
1342
    }
1343
    *to++ = (uchar) chr;
1344
  }
1345
  row_end=to;					// Found full line
1346
  return 0;
1347
1348
found_eof:
1349
  found_end_of_line=eof=1;
1350
  row_start=buffer;
1351
  row_end=to;
1352
  return to == buffer ? 1 : 0;
1353
}
1354
1355
1356
int READ_INFO::next_line()
1357
{
1358
  line_cuted=0;
1359
  start_of_line= line_start_ptr != 0;
1360
  if (found_end_of_line || eof)
1361
  {
1362
    found_end_of_line=0;
1363
    return eof;
1364
  }
1365
  found_end_of_line=0;
1366
  if (!line_term_length)
1367
    return 0;					// No lines
1368
  for (;;)
1369
  {
1370
    int chr = GET;
1371
#ifdef USE_MB
1372
   if (my_mbcharlen(read_charset, chr) > 1)
1373
   {
1374
       for (uint i=1;
1375
            chr != my_b_EOF && i<my_mbcharlen(read_charset, chr);
1376
            i++)
1377
	   chr = GET;
1378
       if (chr == escape_char)
1379
	   continue;
1380
   }
1381
#endif
1382
   if (chr == my_b_EOF)
1383
   {
1384
      eof=1;
1385
      return 1;
1386
    }
1387
    if (chr == escape_char)
1388
    {
1389
      line_cuted=1;
1390
      if (GET == my_b_EOF)
1391
	return 1;
1392
      continue;
1393
    }
1394
    if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
1395
      return 0;
1396
    line_cuted=1;
1397
  }
1398
}
1399
1400
1401
bool READ_INFO::find_start_of_fields()
1402
{
1403
  int chr;
1404
 try_again:
1405
  do
1406
  {
1407
    if ((chr=GET) == my_b_EOF)
1408
    {
1409
      found_end_of_line=eof=1;
1410
      return 1;
1411
    }
1412
  } while ((char) chr != line_start_ptr[0]);
1413
  for (char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
1414
  {
1415
    chr=GET;					// Eof will be checked later
1416
    if ((char) chr != *ptr)
1417
    {						// Can't be line_start
1418
      PUSH(chr);
1419
      while (--ptr != line_start_ptr)
1420
      {						// Restart with next char
1421
	PUSH((uchar) *ptr);
1422
      }
1423
      goto try_again;
1424
    }
1425
  }
1426
  return 0;
1427
}
1428
1429
1430
/*
1431
  Clear taglist from tags with a specified level
1432
*/
1433
int READ_INFO::clear_level(int level)
1434
{
1435
  List_iterator<XML_TAG> xmlit(taglist);
1436
  xmlit.rewind();
1437
  XML_TAG *tag;
1438
  
1439
  while ((tag= xmlit++))
1440
  {
1441
     if(tag->level >= level)
1442
     {
1443
       xmlit.remove();
1444
       delete tag;
1445
     }
1446
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1447
  return(0);
1 by brian
clean slate
1448
}
1449
1450
1451
/*
1452
  Convert an XML entity to Unicode value.
1453
  Return -1 on error;
1454
*/
1455
static int
1456
my_xml_entity_to_char(const char *name, uint length)
1457
{
1458
  if (length == 2)
1459
  {
1460
    if (!memcmp(name, "gt", length))
1461
      return '>';
1462
    if (!memcmp(name, "lt", length))
1463
      return '<';
1464
  }
1465
  else if (length == 3)
1466
  {
1467
    if (!memcmp(name, "amp", length))
1468
      return '&';
1469
  }
1470
  else if (length == 4)
1471
  {
1472
    if (!memcmp(name, "quot", length))
1473
      return '"';
1474
    if (!memcmp(name, "apos", length))
1475
      return '\'';
1476
  }
1477
  return -1;
1478
}
1479
1480
1481
/**
1482
  @brief Convert newline, linefeed, tab to space
1483
  
1484
  @param chr    character
1485
  
1486
  @details According to the "XML 1.0" standard,
1487
           only space (#x20) characters, carriage returns,
1488
           line feeds or tabs are considered as spaces.
1489
           Convert all of them to space (#x20) for parsing simplicity.
1490
*/
1491
static int
1492
my_tospace(int chr)
1493
{
1494
  return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1495
}
1496
1497
1498
/*
1499
  Read an xml value: handle multibyte and xml escape
1500
*/
1501
int READ_INFO::read_value(int delim, String *val)
1502
{
1503
  int chr;
1504
  String tmp;
1505
1506
  for (chr= my_tospace(GET); chr != delim && chr != my_b_EOF; )
1507
  {
1508
#ifdef USE_MB
1509
    if (my_mbcharlen(read_charset, chr) > 1)
1510
    {
1511
      int i, ml= my_mbcharlen(read_charset, chr);
1512
      for (i= 1; i < ml; i++) 
1513
      {
1514
        val->append(chr);
1515
        /*
1516
          Don't use my_tospace() in the middle of a multi-byte character
1517
          TODO: check that the multi-byte sequence is valid.
1518
        */
1519
        chr= GET; 
1520
        if (chr == my_b_EOF)
1521
          return chr;
1522
      }
1523
    }
1524
#endif
1525
    if(chr == '&')
1526
    {
1527
      tmp.length(0);
1528
      for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
1529
      {
1530
        if (chr == my_b_EOF)
1531
          return chr;
1532
        tmp.append(chr);
1533
      }
1534
      if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1535
        val->append(chr);
1536
      else
1537
      {
1538
        val->append('&');
1539
        val->append(tmp);
1540
        val->append(';'); 
1541
      }
1542
    }
1543
    else
1544
      val->append(chr);
1545
    chr= my_tospace(GET);
1546
  }            
1547
  return chr;
1548
}
1549
1550
1551
/*
1552
  Read a record in xml format
1553
  tags and attributes are stored in taglist
1554
  when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1555
*/
1556
int READ_INFO::read_xml()
1557
{
1558
  int chr, chr2, chr3;
1559
  int delim= 0;
1560
  String tag, attribute, value;
1561
  bool in_tag= false;
1562
  
1563
  tag.length(0);
1564
  attribute.length(0);
1565
  value.length(0);
1566
  
1567
  for (chr= my_tospace(GET); chr != my_b_EOF ; )
1568
  {
1569
    switch(chr){
1570
    case '<':  /* read tag */
1571
        /* TODO: check if this is a comment <!-- comment -->  */
1572
      chr= my_tospace(GET);
1573
      if(chr == '!')
1574
      {
1575
        chr2= GET;
1576
        chr3= GET;
1577
        
1578
        if(chr2 == '-' && chr3 == '-')
1579
        {
1580
          chr2= 0;
1581
          chr3= 0;
1582
          chr= my_tospace(GET);
1583
          
1584
          while(chr != '>' || chr2 != '-' || chr3 != '-')
1585
          {
1586
            if(chr == '-')
1587
            {
1588
              chr3= chr2;
1589
              chr2= chr;
1590
            }
1591
            else if (chr2 == '-')
1592
            {
1593
              chr2= 0;
1594
              chr3= 0;
1595
            }
1596
            chr= my_tospace(GET);
1597
            if (chr == my_b_EOF)
1598
              goto found_eof;
1599
          }
1600
          break;
1601
        }
1602
      }
1603
      
1604
      tag.length(0);
1605
      while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
1606
      {
1607
        if(chr != delim) /* fix for the '<field name =' format */
1608
          tag.append(chr);
1609
        chr= my_tospace(GET);
1610
      }
1611
      
1612
      if(chr == ' ' || chr == '>')
1613
      {
1614
        level++;
1615
        clear_level(level + 1);
1616
      }
1617
      
1618
      if (chr == ' ')
1619
        in_tag= true;
1620
      else 
1621
        in_tag= false;
1622
      break;
1623
      
1624
    case ' ': /* read attribute */
1625
      while(chr == ' ')  /* skip blanks */
1626
        chr= my_tospace(GET);
1627
      
1628
      if(!in_tag)
1629
        break;
1630
      
1631
      while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
1632
      {
1633
        attribute.append(chr);
1634
        chr= my_tospace(GET);
1635
      }
1636
      break;
1637
      
1638
    case '>': /* end tag - read tag value */
1639
      in_tag= false;
1640
      chr= read_value('<', &value);
1641
      if(chr == my_b_EOF)
1642
        goto found_eof;
1643
      
1644
      /* save value to list */
1645
      if(tag.length() > 0 && value.length() > 0)
1646
        taglist.push_front( new XML_TAG(level, tag, value));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1647
1 by brian
clean slate
1648
      tag.length(0);
1649
      value.length(0);
1650
      attribute.length(0);
1651
      break;
1652
      
1653
    case '/': /* close tag */
1654
      level--;
1655
      chr= my_tospace(GET);
1656
      if(chr != '>')   /* if this is an empty tag <tag   /> */
1657
        tag.length(0); /* we should keep tag value          */
1658
      while(chr != '>' && chr != my_b_EOF)
1659
      {
1660
        tag.append(chr);
1661
        chr= my_tospace(GET);
1662
      }
1663
      
1664
      if((tag.length() == line_term_length -2) &&
1665
         (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1666
         return(0); //normal return
1667
1 by brian
clean slate
1668
      chr= my_tospace(GET);
1669
      break;   
1670
      
1671
    case '=': /* attribute name end - read the value */
1672
      //check for tag field and attribute name
1673
      if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
1674
         !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
1675
      {
1676
        /*
1677
          this is format <field name="xx">xx</field>
1678
          where actual fieldname is in attribute
1679
        */
1680
        delim= my_tospace(GET);
1681
        tag.length(0);
1682
        attribute.length(0);
1683
        chr= '<'; /* we pretend that it is a tag */
1684
        level--;
1685
        break;
1686
      }
1687
      
1688
      //check for " or '
1689
      chr= GET;
1690
      if (chr == my_b_EOF)
1691
        goto found_eof;
1692
      if(chr == '"' || chr == '\'')
1693
      {
1694
        delim= chr;
1695
      }
1696
      else
1697
      {
1698
        delim= ' '; /* no delimiter, use space */
1699
        PUSH(chr);
1700
      }
1701
      
1702
      chr= read_value(delim, &value);
1703
      if(attribute.length() > 0 && value.length() > 0)
1704
        taglist.push_front(new XML_TAG(level + 1, attribute, value));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1705
1 by brian
clean slate
1706
      attribute.length(0);
1707
      value.length(0);
1708
      if (chr != ' ')
1709
        chr= my_tospace(GET);
1710
      break;
1711
    
1712
    default:
1713
      chr= my_tospace(GET);
1714
    } /* end switch */
1715
  } /* end while */
1716
  
1717
found_eof:
1718
  eof= 1;
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1719
  return(1);
1 by brian
clean slate
1720
}