~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
17
/* Copy data from a textfile to table */
18
/* 2006-12 Erik Wetterberg : LOAD XML added */
19
20
#include "mysql_priv.h"
21
#include <my_dir.h>
22
#include <m_ctype.h>
23
#include "sql_repl.h"
24
25
class XML_TAG {
26
public:
27
  int level;
28
  String field;
29
  String value;
30
  XML_TAG(int l, String f, String v);
31
};
32
33
34
XML_TAG::XML_TAG(int l, String f, String v)
35
{
36
  level= l;
37
  field.append(f);
38
  value.append(v);
39
}
40
41
42
class READ_INFO {
43
  File	file;
44
  uchar	*buffer,			/* Buffer for read text */
45
	*end_of_buff;			/* Data in bufferts ends here */
46
  uint	buff_length,			/* Length of buffert */
47
	max_length;			/* Max length of row */
48
  char	*field_term_ptr,*line_term_ptr,*line_start_ptr,*line_start_end;
49
  uint	field_term_length,line_term_length,enclosed_length;
50
  int	field_term_char,line_term_char,enclosed_char,escape_char;
51
  int	*stack,*stack_pos;
52
  bool	found_end_of_line,start_of_line,eof;
53
  bool  need_end_io_cache;
54
  IO_CACHE cache;
55
  NET *io_net;
56
  int level; /* for load xml */
57
58
public:
59
  bool error,line_cuted,found_null,enclosed;
60
  uchar	*row_start,			/* Found row starts here */
61
	*row_end;			/* Found row ends here */
62
  CHARSET_INFO *read_charset;
63
64
  READ_INFO(File file,uint tot_length,CHARSET_INFO *cs,
65
	    String &field_term,String &line_start,String &line_term,
66
	    String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
67
  ~READ_INFO();
68
  int read_field();
69
  int read_fixed_length(void);
70
  int next_line(void);
71
  char unescape(char chr);
72
  int terminator(char *ptr,uint length);
73
  bool find_start_of_fields();
74
  /* load xml */
75
  List<XML_TAG> taglist;
76
  int read_value(int delim, String *val);
77
  int read_xml();
78
  int clear_level(int level);
79
80
  /*
81
    We need to force cache close before destructor is invoked to log
82
    the last read block
83
  */
84
  void end_io_cache()
85
  {
86
    ::end_io_cache(&cache);
87
    need_end_io_cache = 0;
88
  }
89
90
  /*
91
    Either this method, or we need to make cache public
92
    Arg must be set from mysql_load() since constructor does not see
93
    either the table or THD value
94
  */
95
  void set_io_cache_arg(void* arg) { cache.arg = arg; }
96
};
97
98
static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
99
                             List<Item> &fields_vars, List<Item> &set_fields,
100
                             List<Item> &set_values, READ_INFO &read_info,
101
			     ulong skip_lines,
102
			     bool ignore_check_option_errors);
103
static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
104
                          List<Item> &fields_vars, List<Item> &set_fields,
105
                          List<Item> &set_values, READ_INFO &read_info,
106
			  String &enclosed, ulong skip_lines,
107
			  bool ignore_check_option_errors);
108
109
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
110
                          List<Item> &fields_vars, List<Item> &set_fields,
111
                          List<Item> &set_values, READ_INFO &read_info,
112
                          String &enclosed, ulong skip_lines,
113
                          bool ignore_check_option_errors);
114
115
static bool write_execute_load_query_log_event(THD *thd,
116
					       bool duplicates, bool ignore,
117
					       bool transactional_table,
118
                                               THD::killed_state killed_status);
119
120
/*
121
  Execute LOAD DATA query
122
123
  SYNOPSYS
124
    mysql_load()
125
      thd - current thread
126
      ex  - sql_exchange object representing source file and its parsing rules
127
      table_list  - list of tables to which we are loading data
128
      fields_vars - list of fields and variables to which we read
129
                    data from file
130
      set_fields  - list of fields mentioned in set clause
131
      set_values  - expressions to assign to fields in previous list
132
      handle_duplicates - indicates whenever we should emit error or
133
                          replace row if we will meet duplicates.
134
      ignore -          - indicates whenever we should ignore duplicates
135
      read_file_from_client - is this LOAD DATA LOCAL ?
136
137
  RETURN VALUES
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
138
    true - error / false - success
1 by brian
clean slate
139
*/
140
141
int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
142
	        List<Item> &fields_vars, List<Item> &set_fields,
143
                List<Item> &set_values,
144
                enum enum_duplicates handle_duplicates, bool ignore,
145
                bool read_file_from_client)
146
{
147
  char name[FN_REFLEN];
148
  File file;
149
  TABLE *table= NULL;
150
  int error;
151
  String *field_term=ex->field_term,*escaped=ex->escaped;
152
  String *enclosed=ex->enclosed;
153
  bool is_fifo=0;
154
  LOAD_FILE_INFO lf_info;
155
  char *db = table_list->db;			// This is never null
156
  /*
157
    If path for file is not defined, we will use the current database.
158
    If this is not set, we will use the directory where the table to be
159
    loaded is located
160
  */
161
  char *tdb= thd->db ? thd->db : db;		// Result is never null
162
  ulong skip_lines= ex->skip_lines;
163
  bool transactional_table;
164
  THD::killed_state killed_status= THD::NOT_KILLED;
165
166
  if (escaped->length() > 1 || enclosed->length() > 1)
167
  {
168
    my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
169
	       MYF(0));
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
170
    return(true);
1 by brian
clean slate
171
  }
172
  if (open_and_lock_tables(thd, table_list))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
173
    return(true);
1 by brian
clean slate
174
  if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
175
                                    &thd->lex->select_lex.top_join_list,
176
                                    table_list,
177
                                    &thd->lex->select_lex.leaf_tables, true))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
178
     return(-1);
1 by brian
clean slate
179
180
  /*
181
    Let us emit an error if we are loading data to table which is used
182
    in subselect in SET clause like we do it for INSERT.
183
184
    The main thing to fix to remove this restriction is to ensure that the
185
    table is marked to be 'used for insert' in which case we should never
186
    mark this table as 'const table' (ie, one that has only one row).
187
  */
188
  if (unique_table(thd, table_list, table_list->next_global, 0))
189
  {
190
    my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
191
    return(true);
1 by brian
clean slate
192
  }
193
194
  table= table_list->table;
195
  transactional_table= table->file->has_transactions();
196
197
  if (!fields_vars.elements)
198
  {
199
    Field **field;
200
    for (field=table->field; *field ; field++)
201
      fields_vars.push_back(new Item_field(*field));
202
    bitmap_set_all(table->write_set);
203
    table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
204
    /*
205
      Let us also prepare SET clause, altough it is probably empty
206
      in this case.
207
    */
208
    if (setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
209
        setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
210
      return(true);
1 by brian
clean slate
211
  }
212
  else
213
  {						// Part field list
214
    /* TODO: use this conds for 'WITH CHECK OPTIONS' */
215
    if (setup_fields(thd, 0, fields_vars, MARK_COLUMNS_WRITE, 0, 0) ||
216
        setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
217
        check_that_all_fields_are_given_values(thd, table, table_list))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
218
      return(true);
1 by brian
clean slate
219
    /*
220
      Check whenever TIMESTAMP field with auto-set feature specified
221
      explicitly.
222
    */
223
    if (table->timestamp_field)
224
    {
225
      if (bitmap_is_set(table->write_set,
226
                        table->timestamp_field->field_index))
227
        table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
228
      else
229
      {
230
        bitmap_set_bit(table->write_set,
231
                       table->timestamp_field->field_index);
232
      }
233
    }
234
    /* Fix the expressions in SET clause */
235
    if (setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
236
      return(true);
1 by brian
clean slate
237
  }
238
239
  table->mark_columns_needed_for_insert();
240
241
  uint tot_length=0;
242
  bool use_blobs= 0, use_vars= 0;
243
  List_iterator_fast<Item> it(fields_vars);
244
  Item *item;
245
246
  while ((item= it++))
247
  {
248
    Item *real_item= item->real_item();
249
250
    if (real_item->type() == Item::FIELD_ITEM)
251
    {
252
      Field *field= ((Item_field*)real_item)->field;
253
      if (field->flags & BLOB_FLAG)
254
      {
255
        use_blobs= 1;
256
        tot_length+= 256;			// Will be extended if needed
257
      }
258
      else
259
        tot_length+= field->field_length;
260
    }
261
    else if (item->type() == Item::STRING_ITEM)
262
      use_vars= 1;
263
  }
264
  if (use_blobs && !ex->line_term->length() && !field_term->length())
265
  {
266
    my_message(ER_BLOBS_AND_NO_TERMINATED,ER(ER_BLOBS_AND_NO_TERMINATED),
267
	       MYF(0));
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
268
    return(true);
1 by brian
clean slate
269
  }
270
  if (use_vars && !field_term->length() && !enclosed->length())
271
  {
272
    my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
273
    return(true);
1 by brian
clean slate
274
  }
275
276
  /* We can't give an error in the middle when using LOCAL files */
277
  if (read_file_from_client && handle_duplicates == DUP_ERROR)
278
    ignore= 1;
279
280
  if (read_file_from_client)
281
  {
282
    (void)net_request_file(&thd->net,ex->file_name);
283
    file = -1;
284
  }
285
  else
286
  {
287
#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS
288
    ex->file_name+=dirname_length(ex->file_name);
289
#endif
290
    if (!dirname_length(ex->file_name))
291
    {
292
      strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
293
      (void) fn_format(name, ex->file_name, name, "",
294
		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
295
    }
296
    else
297
    {
298
      (void) fn_format(name, ex->file_name, mysql_real_data_home, "",
299
		       MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
300
301
      if (opt_secure_file_priv &&
302
          strncmp(opt_secure_file_priv, name, strlen(opt_secure_file_priv)))
303
      {
304
        /* Read only allowed from within dir specified by secure_file_priv */
305
        my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
306
        return(true);
1 by brian
clean slate
307
      }
308
15 by brian
Fix for stat, NETWARE removal
309
      struct stat stat_info;
310
      if (stat(name,&stat_info))
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
311
	return(true);
1 by brian
clean slate
312
313
      // if we are not in slave thread, the file must be:
314
      if (!thd->slave_thread &&
315
	  !((stat_info.st_mode & S_IROTH) == S_IROTH &&  // readable by others
316
	    (stat_info.st_mode & S_IFLNK) != S_IFLNK && // and not a symlink
317
	    ((stat_info.st_mode & S_IFREG) == S_IFREG ||
318
	     (stat_info.st_mode & S_IFIFO) == S_IFIFO)))
319
      {
320
	my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
321
	return(true);
1 by brian
clean slate
322
      }
323
      if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
324
	is_fifo = 1;
325
    }
326
    if ((file=my_open(name,O_RDONLY,MYF(MY_WME))) < 0)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
327
      return(true);
1 by brian
clean slate
328
  }
329
330
  COPY_INFO info;
331
  bzero((char*) &info,sizeof(info));
332
  info.ignore= ignore;
333
  info.handle_duplicates=handle_duplicates;
334
  info.escape_char=escaped->length() ? (*escaped)[0] : INT_MAX;
335
336
  READ_INFO read_info(file,tot_length,
337
                      ex->cs ? ex->cs : thd->variables.collation_database,
338
		      *field_term,*ex->line_start, *ex->line_term, *enclosed,
339
		      info.escape_char, read_file_from_client, is_fifo);
340
  if (read_info.error)
341
  {
342
    if	(file >= 0)
343
      my_close(file,MYF(0));			// no files in net reading
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
344
    return(true);				// Can't allocate buffers
1 by brian
clean slate
345
  }
346
347
  if (mysql_bin_log.is_open())
348
  {
349
    lf_info.thd = thd;
350
    lf_info.wrote_create_file = 0;
351
    lf_info.last_pos_in_file = HA_POS_ERROR;
352
    lf_info.log_delayed= transactional_table;
353
    read_info.set_io_cache_arg((void*) &lf_info);
354
  }
355
356
  thd->count_cuted_fields= CHECK_FIELD_WARN;		/* calc cuted fields */
357
  thd->cuted_fields=0L;
358
  /* Skip lines if there is a line terminator */
359
  if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
360
  {
361
    /* ex->skip_lines needs to be preserved for logging */
362
    while (skip_lines > 0)
363
    {
364
      skip_lines--;
365
      if (read_info.next_line())
366
	break;
367
    }
368
  }
369
370
  if (!(error=test(read_info.error)))
371
  {
372
373
    table->next_number_field=table->found_next_number_field;
374
    if (ignore ||
375
	handle_duplicates == DUP_REPLACE)
376
      table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
377
    if (handle_duplicates == DUP_REPLACE)
378
        table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
379
    table->file->ha_start_bulk_insert((ha_rows) 0);
380
    table->copy_blobs=1;
381
382
    thd->abort_on_warning= (!ignore &&
383
                            (thd->variables.sql_mode &
384
                             (MODE_STRICT_TRANS_TABLES |
385
                              MODE_STRICT_ALL_TABLES)));
386
387
    if (ex->filetype == FILETYPE_XML) /* load xml */
388
      error= read_xml_field(thd, info, table_list, fields_vars,
389
                            set_fields, set_values, read_info,
390
                            *(ex->line_term), skip_lines, ignore);
391
    else if (!field_term->length() && !enclosed->length())
392
      error= read_fixed_length(thd, info, table_list, fields_vars,
393
                               set_fields, set_values, read_info,
394
			       skip_lines, ignore);
395
    else
396
      error= read_sep_field(thd, info, table_list, fields_vars,
397
                            set_fields, set_values, read_info,
398
			    *enclosed, skip_lines, ignore);
399
    if (table->file->ha_end_bulk_insert() && !error)
400
    {
401
      table->file->print_error(my_errno, MYF(0));
402
      error= 1;
403
    }
404
    table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
405
    table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
406
    table->next_number_field=0;
407
  }
408
  if (file >= 0)
409
    my_close(file,MYF(0));
410
  free_blobs(table);				/* if pack_blob was used */
411
  table->copy_blobs=0;
412
  thd->count_cuted_fields= CHECK_FIELD_IGNORE;
413
  /* 
414
     simulated killing in the middle of per-row loop
415
     must be effective for binlogging
416
  */
417
  killed_status= (error == 0)? THD::NOT_KILLED : thd->killed;
418
  if (error)
419
  {
420
    if (read_file_from_client)
421
      while (!read_info.next_line())
422
	;
423
424
    if (mysql_bin_log.is_open())
425
    {
426
      {
427
	/*
428
	  Make sure last block (the one which caused the error) gets
429
	  logged.  This is needed because otherwise after write of (to
430
	  the binlog, not to read_info (which is a cache))
431
	  Delete_file_log_event the bad block will remain in read_info
432
	  (because pre_read is not called at the end of the last
433
	  block; remember pre_read is called whenever a new block is
434
	  read from disk).  At the end of mysql_load(), the destructor
435
	  of read_info will call end_io_cache() which will flush
436
	  read_info, so we will finally have this in the binlog:
437
438
	  Append_block # The last successfull block
439
	  Delete_file
440
	  Append_block # The failing block
441
	  which is nonsense.
442
	  Or could also be (for a small file)
443
	  Create_file  # The failing block
444
	  which is nonsense (Delete_file is not written in this case, because:
445
	  Create_file has not been written, so Delete_file is not written, then
446
	  when read_info is destroyed end_io_cache() is called which writes
447
	  Create_file.
448
	*/
449
	read_info.end_io_cache();
450
	/* If the file was not empty, wrote_create_file is true */
451
	if (lf_info.wrote_create_file)
452
	{
453
	  if (thd->transaction.stmt.modified_non_trans_table)
454
	    write_execute_load_query_log_event(thd, handle_duplicates,
455
					       ignore, transactional_table,
456
                                               killed_status);
457
	  else
458
	  {
459
	    Delete_file_log_event d(thd, db, transactional_table);
460
            d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
461
	    mysql_bin_log.write(&d);
462
	  }
463
	}
464
      }
465
    }
466
    error= -1;				// Error on read
467
    goto err;
468
  }
469
  sprintf(name, ER(ER_LOAD_INFO), (ulong) info.records, (ulong) info.deleted,
470
	  (ulong) (info.records - info.copied), (ulong) thd->cuted_fields);
471
472
  if (thd->transaction.stmt.modified_non_trans_table)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
473
    thd->transaction.all.modified_non_trans_table= true;
1 by brian
clean slate
474
475
  if (mysql_bin_log.is_open())
476
  {
477
    /*
478
      We need to do the job that is normally done inside
479
      binlog_query() here, which is to ensure that the pending event
480
      is written before tables are unlocked and before any other
481
      events are written.  We also need to update the table map
482
      version for the binary log to mark that table maps are invalid
483
      after this point.
484
     */
485
    if (thd->current_stmt_binlog_row_based)
486
      thd->binlog_flush_pending_rows_event(true);
487
    else
488
    {
489
      /*
490
        As already explained above, we need to call end_io_cache() or the last
491
        block will be logged only after Execute_load_query_log_event (which is
492
        wrong), when read_info is destroyed.
493
      */
494
      read_info.end_io_cache();
495
      if (lf_info.wrote_create_file)
496
      {
497
        write_execute_load_query_log_event(thd, handle_duplicates, ignore,
498
                                           transactional_table,killed_status);
499
      }
500
    }
501
  }
502
503
  /* ok to client sent only after binlog write and engine commit */
504
  my_ok(thd, info.copied + info.deleted, 0L, name);
505
err:
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
506
  assert(transactional_table || !(info.copied || info.deleted) ||
1 by brian
clean slate
507
              thd->transaction.stmt.modified_non_trans_table);
508
  table->file->ha_release_auto_increment();
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
509
  table->auto_increment_field_not_null= false;
1 by brian
clean slate
510
  thd->abort_on_warning= 0;
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
511
  return(error);
1 by brian
clean slate
512
}
513
514
515
/* Not a very useful function; just to avoid duplication of code */
516
static bool write_execute_load_query_log_event(THD *thd,
517
					       bool duplicates, bool ignore,
518
					       bool transactional_table,
519
                                               THD::killed_state killed_err_arg)
520
{
521
  Execute_load_query_log_event
522
    e(thd, thd->query, thd->query_length,
523
      (char*)thd->lex->fname_start - (char*)thd->query,
524
      (char*)thd->lex->fname_end - (char*)thd->query,
525
      (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
526
      (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
527
      transactional_table, false, killed_err_arg);
1 by brian
clean slate
528
  e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
529
  return mysql_bin_log.write(&e);
530
}
531
532
533
/****************************************************************************
534
** Read of rows of fixed size + optional garage + optonal newline
535
****************************************************************************/
536
537
static int
538
read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
539
                  List<Item> &fields_vars, List<Item> &set_fields,
540
                  List<Item> &set_values, READ_INFO &read_info,
541
                  ulong skip_lines, bool ignore_check_option_errors)
542
{
543
  List_iterator_fast<Item> it(fields_vars);
544
  Item_field *sql_field;
545
  TABLE *table= table_list->table;
151 by Brian Aker
Ulonglong to uint64_t
546
  uint64_t id;
1 by brian
clean slate
547
  bool err;
548
549
  id= 0;
550
 
551
  while (!read_info.read_fixed_length())
552
  {
553
    if (thd->killed)
554
    {
555
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
556
      return(1);
1 by brian
clean slate
557
    }
558
    if (skip_lines)
559
    {
560
      /*
561
	We could implement this with a simple seek if:
562
	- We are not using DATA INFILE LOCAL
563
	- escape character is  ""
564
	- line starting prefix is ""
565
      */
566
      skip_lines--;
567
      continue;
568
    }
569
    it.rewind();
570
    uchar *pos=read_info.row_start;
571
#ifdef HAVE_purify
572
    read_info.row_end[0]=0;
573
#endif
574
575
    restore_record(table, s->default_values);
576
    /*
577
      There is no variables in fields_vars list in this format so
578
      this conversion is safe.
579
    */
580
    while ((sql_field= (Item_field*) it++))
581
    {
582
      Field *field= sql_field->field;                  
583
      if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
584
        table->auto_increment_field_not_null= true;
1 by brian
clean slate
585
      /*
586
        No fields specified in fields_vars list can be null in this format.
587
        Mark field as not null, we should do this for each row because of
588
        restore_record...
589
      */
590
      field->set_notnull();
591
592
      if (pos == read_info.row_end)
593
      {
594
        thd->cuted_fields++;			/* Not enough fields */
595
        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
596
                            ER_WARN_TOO_FEW_RECORDS, 
597
                            ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
598
        if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
599
            ((Field_timestamp*) field)->set_time();
600
      }
601
      else
602
      {
603
	uint length;
604
	uchar save_chr;
605
	if ((length=(uint) (read_info.row_end-pos)) >
606
	    field->field_length)
607
	  length=field->field_length;
608
	save_chr=pos[length]; pos[length]='\0'; // Safeguard aganst malloc
609
        field->store((char*) pos,length,read_info.read_charset);
610
	pos[length]=save_chr;
611
	if ((pos+=length) > read_info.row_end)
612
	  pos= read_info.row_end;	/* Fills rest with space */
613
      }
614
    }
615
    if (pos != read_info.row_end)
616
    {
617
      thd->cuted_fields++;			/* To long row */
618
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
619
                          ER_WARN_TOO_MANY_RECORDS, 
620
                          ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); 
621
    }
622
623
    if (thd->killed ||
624
        fill_record(thd, set_fields, set_values,
625
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
626
      return(1);
1 by brian
clean slate
627
628
    err= write_record(thd, table, &info);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
629
    table->auto_increment_field_not_null= false;
1 by brian
clean slate
630
    if (err)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
631
      return(1);
1 by brian
clean slate
632
   
633
    /*
634
      We don't need to reset auto-increment field since we are restoring
635
      its default value at the beginning of each loop iteration.
636
    */
637
    if (read_info.next_line())			// Skip to next line
638
      break;
639
    if (read_info.line_cuted)
640
    {
641
      thd->cuted_fields++;			/* To long row */
642
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
643
                          ER_WARN_TOO_MANY_RECORDS, 
644
                          ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); 
645
    }
646
    thd->row_count++;
647
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
648
  return(test(read_info.error));
1 by brian
clean slate
649
}
650
651
652
653
static int
654
read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
655
               List<Item> &fields_vars, List<Item> &set_fields,
656
               List<Item> &set_values, READ_INFO &read_info,
657
	       String &enclosed, ulong skip_lines,
658
	       bool ignore_check_option_errors)
659
{
660
  List_iterator_fast<Item> it(fields_vars);
661
  Item *item;
662
  TABLE *table= table_list->table;
663
  uint enclosed_length;
151 by Brian Aker
Ulonglong to uint64_t
664
  uint64_t id;
1 by brian
clean slate
665
  bool err;
666
667
  enclosed_length=enclosed.length();
668
  id= 0;
669
670
  for (;;it.rewind())
671
  {
672
    if (thd->killed)
673
    {
674
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
675
      return(1);
1 by brian
clean slate
676
    }
677
678
    restore_record(table, s->default_values);
679
680
    while ((item= it++))
681
    {
682
      uint length;
683
      uchar *pos;
684
      Item *real_item;
685
686
      if (read_info.read_field())
687
	break;
688
689
      /* If this line is to be skipped we don't want to fill field or var */
690
      if (skip_lines)
691
        continue;
692
693
      pos=read_info.row_start;
694
      length=(uint) (read_info.row_end-pos);
695
696
      real_item= item->real_item();
697
698
      if ((!read_info.enclosed && (enclosed_length && length == 4 && !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
699
	  (length == 1 && read_info.found_null))
700
      {
701
702
        if (real_item->type() == Item::FIELD_ITEM)
703
        {
704
          Field *field= ((Item_field *)real_item)->field;
705
          if (field->reset())
706
          {
707
            my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
708
                     thd->row_count);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
709
            return(1);
1 by brian
clean slate
710
          }
711
          field->set_null();
712
          if (!field->maybe_null())
713
          {
714
            if (field->type() == MYSQL_TYPE_TIMESTAMP)
715
              ((Field_timestamp*) field)->set_time();
716
            else if (field != table->next_number_field)
717
              field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
718
                                 ER_WARN_NULL_TO_NOTNULL, 1);
719
          }
720
	}
721
        else if (item->type() == Item::STRING_ITEM)
722
        {
723
          ((Item_user_var_as_out_param *)item)->set_null_value(
724
                                                  read_info.read_charset);
725
        }
726
        else
727
        {
728
          my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
729
          return(1);
1 by brian
clean slate
730
        }
731
732
	continue;
733
      }
734
735
      if (real_item->type() == Item::FIELD_ITEM)
736
      {
737
        Field *field= ((Item_field *)real_item)->field;
738
        field->set_notnull();
739
        read_info.row_end[0]=0;			// Safe to change end marker
740
        if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
741
          table->auto_increment_field_not_null= true;
1 by brian
clean slate
742
        field->store((char*) pos, length, read_info.read_charset);
743
      }
744
      else if (item->type() == Item::STRING_ITEM)
745
      {
746
        ((Item_user_var_as_out_param *)item)->set_value((char*) pos, length,
747
                                                        read_info.read_charset);
748
      }
749
      else
750
      {
751
        my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
752
        return(1);
1 by brian
clean slate
753
      }
754
    }
755
    if (read_info.error)
756
      break;
757
    if (skip_lines)
758
    {
759
      skip_lines--;
760
      continue;
761
    }
762
    if (item)
763
    {
764
      /* Have not read any field, thus input file is simply ended */
765
      if (item == fields_vars.head())
766
	break;
767
      for (; item ; item= it++)
768
      {
769
        Item *real_item= item->real_item();
770
        if (real_item->type() == Item::FIELD_ITEM)
771
        {
772
          Field *field= ((Item_field *)real_item)->field;
773
          if (field->reset())
774
          {
775
            my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name,
776
                     thd->row_count);
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
777
            return(1);
1 by brian
clean slate
778
          }
779
          if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
780
              ((Field_timestamp*) field)->set_time();
781
          /*
782
            QQ: We probably should not throw warning for each field.
783
            But how about intention to always have the same number
784
            of warnings in THD::cuted_fields (and get rid of cuted_fields
785
            in the end ?)
786
          */
787
          thd->cuted_fields++;
788
          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
789
                              ER_WARN_TOO_FEW_RECORDS,
790
                              ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
791
        }
792
        else if (item->type() == Item::STRING_ITEM)
793
        {
794
          ((Item_user_var_as_out_param *)item)->set_null_value(
795
                                                  read_info.read_charset);
796
        }
797
        else
798
        {
799
          my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
800
          return(1);
1 by brian
clean slate
801
        }
802
      }
803
    }
804
805
    if (thd->killed ||
806
        fill_record(thd, set_fields, set_values,
807
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
808
      return(1);
1 by brian
clean slate
809
810
    err= write_record(thd, table, &info);
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
811
    table->auto_increment_field_not_null= false;
1 by brian
clean slate
812
    if (err)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
813
      return(1);
1 by brian
clean slate
814
    /*
815
      We don't need to reset auto-increment field since we are restoring
816
      its default value at the beginning of each loop iteration.
817
    */
818
    if (read_info.next_line())			// Skip to next line
819
      break;
820
    if (read_info.line_cuted)
821
    {
822
      thd->cuted_fields++;			/* To long row */
823
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 
824
                          ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS), 
825
                          thd->row_count);   
826
      if (thd->killed)
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
827
        return(1);
1 by brian
clean slate
828
    }
829
    thd->row_count++;
830
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
831
  return(test(read_info.error));
1 by brian
clean slate
832
}
833
834
835
/****************************************************************************
836
** Read rows in xml format
837
****************************************************************************/
838
static int
839
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
840
               List<Item> &fields_vars, List<Item> &set_fields,
841
               List<Item> &set_values, READ_INFO &read_info,
77.1.46 by Monty Taylor
Finished the warnings work!
842
               String &row_tag __attribute__((__unused__)),
843
               ulong skip_lines,
1 by brian
clean slate
844
               bool ignore_check_option_errors)
845
{
846
  List_iterator_fast<Item> it(fields_vars);
847
  Item *item;
848
  TABLE *table= table_list->table;
849
  bool no_trans_update_stmt;
850
  CHARSET_INFO *cs= read_info.read_charset;
77.1.46 by Monty Taylor
Finished the warnings work!
851
1 by brian
clean slate
852
  no_trans_update_stmt= !table->file->has_transactions();
77.1.46 by Monty Taylor
Finished the warnings work!
853
1 by brian
clean slate
854
  for ( ; ; it.rewind())
855
  {
856
    if (thd->killed)
857
    {
858
      thd->send_kill_message();
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
859
      return(1);
1 by brian
clean slate
860
    }
861
    
862
    // read row tag and save values into tag list
863
    if (read_info.read_xml())
864
      break;
865
    
866
    List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
867
    xmlit.rewind();
868
    XML_TAG *tag= NULL;
869
    
870
    
871
    restore_record(table, s->default_values);
872
    
873
    while ((item= it++))
874
    {
875
      /* If this line is to be skipped we don't want to fill field or var */
876
      if (skip_lines)
877
        continue;
878
      
879
      /* find field in tag list */
880
      xmlit.rewind();
881
      tag= xmlit++;
882
      
883
      while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
884
        tag= xmlit++;
885
      
886
      if (!tag) // found null
887
      {
888
        if (item->type() == Item::FIELD_ITEM)
889
        {
890
          Field *field= ((Item_field *) item)->field;
891
          field->reset();
892
          field->set_null();
893
          if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
894
            table->auto_increment_field_not_null= true;
1 by brian
clean slate
895
          if (!field->maybe_null())
896
          {
897
            if (field->type() == FIELD_TYPE_TIMESTAMP)
898
              ((Field_timestamp *) field)->set_time();
899
            else if (field != table->next_number_field)
900
              field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
901
                                 ER_WARN_NULL_TO_NOTNULL, 1);
902
          }
903
        }
904
        else
905
          ((Item_user_var_as_out_param *) item)->set_null_value(cs);
906
        continue;
907
      }
908
909
      if (item->type() == Item::FIELD_ITEM)
910
      {
911
912
        Field *field= ((Item_field *)item)->field;
913
        field->set_notnull();
914
        if (field == table->next_number_field)
51.1.53 by Jay Pipes
Removed/replaced DBUG symbols and standardized TRUE/FALSE
915
          table->auto_increment_field_not_null= true;
1 by brian
clean slate
916
        field->store((char *) tag->value.ptr(), tag->value.length(), cs);
917
      }
918
      else
919
        ((Item_user_var_as_out_param *) item)->set_value(
920
                                                 (char *) tag->value.ptr(), 
921
                                                 tag->value.length(), cs);
922
    }
923
    
924
    if (read_info.error)
925
      break;
926
    
927
    if (skip_lines)
928
    {
929
      skip_lines--;
930
      continue;
931
    }
932
    
933
    if (item)
934
    {
935
      /* Have not read any field, thus input file is simply ended */
936
      if (item == fields_vars.head())
937
        break;
938
      
939
      for ( ; item; item= it++)
940
      {
941
        if (item->type() == Item::FIELD_ITEM)
942
        {
943
          /*
944
            QQ: We probably should not throw warning for each field.
945
            But how about intention to always have the same number
946
            of warnings in THD::cuted_fields (and get rid of cuted_fields
947
            in the end ?)
948
          */
949
          thd->cuted_fields++;
950
          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
951
                              ER_WARN_TOO_FEW_RECORDS,
952
                              ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
953
        }
954
        else
955
          ((Item_user_var_as_out_param *)item)->set_null_value(cs);
956
      }
957
    }
958
959
    if (thd->killed || fill_record(thd, set_fields, set_values,
960
                    ignore_check_option_errors))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
961
      return(1);
1 by brian
clean slate
962
963
    if (write_record(thd, table, &info))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
964
      return(1);
1 by brian
clean slate
965
    
966
    /*
967
      We don't need to reset auto-increment field since we are restoring
968
      its default value at the beginning of each loop iteration.
969
    */
970
    thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
971
    thd->row_count++;
972
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
973
  return(test(read_info.error));
1 by brian
clean slate
974
} /* load xml end */
975
976
977
/* Unescape all escape characters, mark \N as null */
978
979
char
980
READ_INFO::unescape(char chr)
981
{
982
  /* keep this switch synchornous with the ESCAPE_CHARS macro */
983
  switch(chr) {
984
  case 'n': return '\n';
985
  case 't': return '\t';
986
  case 'r': return '\r';
987
  case 'b': return '\b';
988
  case '0': return 0;				// Ascii null
989
  case 'Z': return '\032';			// Win32 end of file
990
  case 'N': found_null=1;
991
992
    /* fall through */
993
  default:  return chr;
994
  }
995
}
996
997
998
/*
999
  Read a line using buffering
1000
  If last line is empty (in line mode) then it isn't outputed
1001
*/
1002
1003
1004
READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
1005
		     String &field_term, String &line_start, String &line_term,
1006
		     String &enclosed_par, int escape, bool get_it_from_net,
1007
		     bool is_fifo)
1008
  :file(file_par),escape_char(escape)
1009
{
1010
  read_charset= cs;
1011
  field_term_ptr=(char*) field_term.ptr();
1012
  field_term_length= field_term.length();
1013
  line_term_ptr=(char*) line_term.ptr();
1014
  line_term_length= line_term.length();
1015
  level= 0; /* for load xml */
1016
  if (line_start.length() == 0)
1017
  {
1018
    line_start_ptr=0;
1019
    start_of_line= 0;
1020
  }
1021
  else
1022
  {
1023
    line_start_ptr=(char*) line_start.ptr();
1024
    line_start_end=line_start_ptr+line_start.length();
1025
    start_of_line= 1;
1026
  }
1027
  /* If field_terminator == line_terminator, don't use line_terminator */
1028
  if (field_term_length == line_term_length &&
1029
      !memcmp(field_term_ptr,line_term_ptr,field_term_length))
1030
  {
1031
    line_term_length=0;
1032
    line_term_ptr=(char*) "";
1033
  }
1034
  enclosed_char= (enclosed_length=enclosed_par.length()) ?
1035
    (uchar) enclosed_par[0] : INT_MAX;
1036
  field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
1037
  line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
1038
  error=eof=found_end_of_line=found_null=line_cuted=0;
1039
  buff_length=tot_length;
1040
1041
1042
  /* Set of a stack for unget if long terminators */
1043
  uint length=max(field_term_length,line_term_length)+1;
1044
  set_if_bigger(length,line_start.length());
1045
  stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
1046
1047
  if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(0))))
1048
    error=1; /* purecov: inspected */
1049
  else
1050
  {
1051
    end_of_buff=buffer+buff_length;
1052
    if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1053
		      (get_it_from_net) ? READ_NET :
1054
		      (is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1055
		      MYF(MY_WME)))
1056
    {
1057
      my_free((uchar*) buffer,MYF(0)); /* purecov: inspected */
1058
      error=1;
1059
    }
1060
    else
1061
    {
1062
      /*
1063
	init_io_cache() will not initialize read_function member
1064
	if the cache is READ_NET. So we work around the problem with a
1065
	manual assignment
1066
      */
1067
      need_end_io_cache = 1;
1068
1069
      if (get_it_from_net)
1070
	cache.read_function = _my_b_net_read;
1071
1072
      if (mysql_bin_log.is_open())
1073
	cache.pre_read = cache.pre_close =
1074
	  (IO_CACHE_CALLBACK) log_loaded_block;
1075
    }
1076
  }
1077
}
1078
1079
1080
READ_INFO::~READ_INFO()
1081
{
1082
  if (!error)
1083
  {
1084
    if (need_end_io_cache)
1085
      ::end_io_cache(&cache);
1086
    my_free((uchar*) buffer,MYF(0));
1087
    error=1;
1088
  }
1089
  List_iterator<XML_TAG> xmlit(taglist);
1090
  XML_TAG *t;
1091
  while ((t= xmlit++))
1092
    delete(t);
1093
}
1094
1095
1096
#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
1097
#define PUSH(A) *(stack_pos++)=(A)
1098
1099
1100
inline int READ_INFO::terminator(char *ptr,uint length)
1101
{
1102
  int chr=0;					// Keep gcc happy
1103
  uint i;
1104
  for (i=1 ; i < length ; i++)
1105
  {
1106
    if ((chr=GET) != *++ptr)
1107
    {
1108
      break;
1109
    }
1110
  }
1111
  if (i == length)
1112
    return 1;
1113
  PUSH(chr);
1114
  while (i-- > 1)
1115
    PUSH((uchar) *--ptr);
1116
  return 0;
1117
}
1118
1119
1120
int READ_INFO::read_field()
1121
{
1122
  int chr,found_enclosed_char;
1123
  uchar *to,*new_buffer;
1124
1125
  found_null=0;
1126
  if (found_end_of_line)
1127
    return 1;					// One have to call next_line
1128
1129
  /* Skip until we find 'line_start' */
1130
1131
  if (start_of_line)
1132
  {						// Skip until line_start
1133
    start_of_line=0;
1134
    if (find_start_of_fields())
1135
      return 1;
1136
  }
1137
  if ((chr=GET) == my_b_EOF)
1138
  {
1139
    found_end_of_line=eof=1;
1140
    return 1;
1141
  }
1142
  to=buffer;
1143
  if (chr == enclosed_char)
1144
  {
1145
    found_enclosed_char=enclosed_char;
1146
    *to++=(uchar) chr;				// If error
1147
  }
1148
  else
1149
  {
1150
    found_enclosed_char= INT_MAX;
1151
    PUSH(chr);
1152
  }
1153
1154
  for (;;)
1155
  {
1156
    while ( to < end_of_buff)
1157
    {
1158
      chr = GET;
1159
#ifdef USE_MB
1160
      if ((my_mbcharlen(read_charset, chr) > 1) &&
1161
          to+my_mbcharlen(read_charset, chr) <= end_of_buff)
1162
      {
1163
	  uchar* p = (uchar*)to;
1164
	  *to++ = chr;
1165
	  int ml = my_mbcharlen(read_charset, chr);
1166
	  int i;
1167
	  for (i=1; i<ml; i++) {
1168
	      chr = GET;
1169
	      if (chr == my_b_EOF)
1170
		  goto found_eof;
1171
	      *to++ = chr;
1172
	  }
1173
	  if (my_ismbchar(read_charset,
1174
                          (const char *)p,
1175
                          (const char *)to))
1176
	    continue;
1177
	  for (i=0; i<ml; i++)
1178
	    PUSH((uchar) *--to);
1179
	  chr = GET;
1180
      }
1181
#endif
1182
      if (chr == my_b_EOF)
1183
	goto found_eof;
1184
      if (chr == escape_char)
1185
      {
1186
	if ((chr=GET) == my_b_EOF)
1187
	{
1188
	  *to++= (uchar) escape_char;
1189
	  goto found_eof;
1190
	}
1191
        /*
1192
          When escape_char == enclosed_char, we treat it like we do for
1193
          handling quotes in SQL parsing -- you can double-up the
1194
          escape_char to include it literally, but it doesn't do escapes
1195
          like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1196
          with data like: "fie""ld1", "field2"
1197
         */
1198
        if (escape_char != enclosed_char || chr == escape_char)
1199
        {
1200
          *to++ = (uchar) unescape((char) chr);
1201
          continue;
1202
        }
1203
        PUSH(chr);
1204
        chr= escape_char;
1205
      }
1206
#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
1207
      if (chr == line_term_char)
1208
#else
1209
      if (chr == line_term_char && found_enclosed_char == INT_MAX)
1210
#endif
1211
      {
1212
	if (terminator(line_term_ptr,line_term_length))
1213
	{					// Maybe unexpected linefeed
1214
	  enclosed=0;
1215
	  found_end_of_line=1;
1216
	  row_start=buffer;
1217
	  row_end=  to;
1218
	  return 0;
1219
	}
1220
      }
1221
      if (chr == found_enclosed_char)
1222
      {
1223
	if ((chr=GET) == found_enclosed_char)
1224
	{					// Remove dupplicated
1225
	  *to++ = (uchar) chr;
1226
	  continue;
1227
	}
1228
	// End of enclosed field if followed by field_term or line_term
1229
	if (chr == my_b_EOF ||
1230
	    (chr == line_term_char && terminator(line_term_ptr, line_term_length)))
1231
	{					// Maybe unexpected linefeed
1232
	  enclosed=1;
1233
	  found_end_of_line=1;
1234
	  row_start=buffer+1;
1235
	  row_end=  to;
1236
	  return 0;
1237
	}
1238
	if (chr == field_term_char &&
1239
	    terminator(field_term_ptr,field_term_length))
1240
	{
1241
	  enclosed=1;
1242
	  row_start=buffer+1;
1243
	  row_end=  to;
1244
	  return 0;
1245
	}
1246
	/*
1247
	  The string didn't terminate yet.
1248
	  Store back next character for the loop
1249
	*/
1250
	PUSH(chr);
1251
	/* copy the found term character to 'to' */
1252
	chr= found_enclosed_char;
1253
      }
1254
      else if (chr == field_term_char && found_enclosed_char == INT_MAX)
1255
      {
1256
	if (terminator(field_term_ptr,field_term_length))
1257
	{
1258
	  enclosed=0;
1259
	  row_start=buffer;
1260
	  row_end=  to;
1261
	  return 0;
1262
	}
1263
      }
1264
      *to++ = (uchar) chr;
1265
    }
1266
    /*
1267
    ** We come here if buffer is too small. Enlarge it and continue
1268
    */
1269
    if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE,
1270
					MYF(MY_WME))))
1271
      return (error=1);
1272
    to=new_buffer + (to-buffer);
1273
    buffer=new_buffer;
1274
    buff_length+=IO_SIZE;
1275
    end_of_buff=buffer+buff_length;
1276
  }
1277
1278
found_eof:
1279
  enclosed=0;
1280
  found_end_of_line=eof=1;
1281
  row_start=buffer;
1282
  row_end=to;
1283
  return 0;
1284
}
1285
1286
/*
1287
  Read a row with fixed length.
1288
1289
  NOTES
1290
    The row may not be fixed size on disk if there are escape
1291
    characters in the file.
1292
1293
  IMPLEMENTATION NOTE
1294
    One can't use fixed length with multi-byte charset **
1295
1296
  RETURN
1297
    0  ok
1298
    1  error
1299
*/
1300
1301
int READ_INFO::read_fixed_length()
1302
{
1303
  int chr;
1304
  uchar *to;
1305
  if (found_end_of_line)
1306
    return 1;					// One have to call next_line
1307
1308
  if (start_of_line)
1309
  {						// Skip until line_start
1310
    start_of_line=0;
1311
    if (find_start_of_fields())
1312
      return 1;
1313
  }
1314
1315
  to=row_start=buffer;
1316
  while (to < end_of_buff)
1317
  {
1318
    if ((chr=GET) == my_b_EOF)
1319
      goto found_eof;
1320
    if (chr == escape_char)
1321
    {
1322
      if ((chr=GET) == my_b_EOF)
1323
      {
1324
	*to++= (uchar) escape_char;
1325
	goto found_eof;
1326
      }
1327
      *to++ =(uchar) unescape((char) chr);
1328
      continue;
1329
    }
1330
    if (chr == line_term_char)
1331
    {
1332
      if (terminator(line_term_ptr,line_term_length))
1333
      {						// Maybe unexpected linefeed
1334
	found_end_of_line=1;
1335
	row_end=  to;
1336
	return 0;
1337
      }
1338
    }
1339
    *to++ = (uchar) chr;
1340
  }
1341
  row_end=to;					// Found full line
1342
  return 0;
1343
1344
found_eof:
1345
  found_end_of_line=eof=1;
1346
  row_start=buffer;
1347
  row_end=to;
1348
  return to == buffer ? 1 : 0;
1349
}
1350
1351
1352
int READ_INFO::next_line()
1353
{
1354
  line_cuted=0;
1355
  start_of_line= line_start_ptr != 0;
1356
  if (found_end_of_line || eof)
1357
  {
1358
    found_end_of_line=0;
1359
    return eof;
1360
  }
1361
  found_end_of_line=0;
1362
  if (!line_term_length)
1363
    return 0;					// No lines
1364
  for (;;)
1365
  {
1366
    int chr = GET;
1367
#ifdef USE_MB
1368
   if (my_mbcharlen(read_charset, chr) > 1)
1369
   {
1370
       for (uint i=1;
1371
            chr != my_b_EOF && i<my_mbcharlen(read_charset, chr);
1372
            i++)
1373
	   chr = GET;
1374
       if (chr == escape_char)
1375
	   continue;
1376
   }
1377
#endif
1378
   if (chr == my_b_EOF)
1379
   {
1380
      eof=1;
1381
      return 1;
1382
    }
1383
    if (chr == escape_char)
1384
    {
1385
      line_cuted=1;
1386
      if (GET == my_b_EOF)
1387
	return 1;
1388
      continue;
1389
    }
1390
    if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
1391
      return 0;
1392
    line_cuted=1;
1393
  }
1394
}
1395
1396
1397
bool READ_INFO::find_start_of_fields()
1398
{
1399
  int chr;
1400
 try_again:
1401
  do
1402
  {
1403
    if ((chr=GET) == my_b_EOF)
1404
    {
1405
      found_end_of_line=eof=1;
1406
      return 1;
1407
    }
1408
  } while ((char) chr != line_start_ptr[0]);
1409
  for (char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
1410
  {
1411
    chr=GET;					// Eof will be checked later
1412
    if ((char) chr != *ptr)
1413
    {						// Can't be line_start
1414
      PUSH(chr);
1415
      while (--ptr != line_start_ptr)
1416
      {						// Restart with next char
1417
	PUSH((uchar) *ptr);
1418
      }
1419
      goto try_again;
1420
    }
1421
  }
1422
  return 0;
1423
}
1424
1425
1426
/*
1427
  Clear taglist from tags with a specified level
1428
*/
1429
int READ_INFO::clear_level(int level)
1430
{
1431
  List_iterator<XML_TAG> xmlit(taglist);
1432
  xmlit.rewind();
1433
  XML_TAG *tag;
1434
  
1435
  while ((tag= xmlit++))
1436
  {
1437
     if(tag->level >= level)
1438
     {
1439
       xmlit.remove();
1440
       delete tag;
1441
     }
1442
  }
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1443
  return(0);
1 by brian
clean slate
1444
}
1445
1446
1447
/*
1448
  Convert an XML entity to Unicode value.
1449
  Return -1 on error;
1450
*/
1451
static int
1452
my_xml_entity_to_char(const char *name, uint length)
1453
{
1454
  if (length == 2)
1455
  {
1456
    if (!memcmp(name, "gt", length))
1457
      return '>';
1458
    if (!memcmp(name, "lt", length))
1459
      return '<';
1460
  }
1461
  else if (length == 3)
1462
  {
1463
    if (!memcmp(name, "amp", length))
1464
      return '&';
1465
  }
1466
  else if (length == 4)
1467
  {
1468
    if (!memcmp(name, "quot", length))
1469
      return '"';
1470
    if (!memcmp(name, "apos", length))
1471
      return '\'';
1472
  }
1473
  return -1;
1474
}
1475
1476
1477
/**
1478
  @brief Convert newline, linefeed, tab to space
1479
  
1480
  @param chr    character
1481
  
1482
  @details According to the "XML 1.0" standard,
1483
           only space (#x20) characters, carriage returns,
1484
           line feeds or tabs are considered as spaces.
1485
           Convert all of them to space (#x20) for parsing simplicity.
1486
*/
1487
static int
1488
my_tospace(int chr)
1489
{
1490
  return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1491
}
1492
1493
1494
/*
1495
  Read an xml value: handle multibyte and xml escape
1496
*/
1497
int READ_INFO::read_value(int delim, String *val)
1498
{
1499
  int chr;
1500
  String tmp;
1501
1502
  for (chr= my_tospace(GET); chr != delim && chr != my_b_EOF; )
1503
  {
1504
#ifdef USE_MB
1505
    if (my_mbcharlen(read_charset, chr) > 1)
1506
    {
1507
      int i, ml= my_mbcharlen(read_charset, chr);
1508
      for (i= 1; i < ml; i++) 
1509
      {
1510
        val->append(chr);
1511
        /*
1512
          Don't use my_tospace() in the middle of a multi-byte character
1513
          TODO: check that the multi-byte sequence is valid.
1514
        */
1515
        chr= GET; 
1516
        if (chr == my_b_EOF)
1517
          return chr;
1518
      }
1519
    }
1520
#endif
1521
    if(chr == '&')
1522
    {
1523
      tmp.length(0);
1524
      for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
1525
      {
1526
        if (chr == my_b_EOF)
1527
          return chr;
1528
        tmp.append(chr);
1529
      }
1530
      if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1531
        val->append(chr);
1532
      else
1533
      {
1534
        val->append('&');
1535
        val->append(tmp);
1536
        val->append(';'); 
1537
      }
1538
    }
1539
    else
1540
      val->append(chr);
1541
    chr= my_tospace(GET);
1542
  }            
1543
  return chr;
1544
}
1545
1546
1547
/*
1548
  Read a record in xml format
1549
  tags and attributes are stored in taglist
1550
  when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1551
*/
1552
int READ_INFO::read_xml()
1553
{
1554
  int chr, chr2, chr3;
1555
  int delim= 0;
1556
  String tag, attribute, value;
1557
  bool in_tag= false;
1558
  
1559
  tag.length(0);
1560
  attribute.length(0);
1561
  value.length(0);
1562
  
1563
  for (chr= my_tospace(GET); chr != my_b_EOF ; )
1564
  {
1565
    switch(chr){
1566
    case '<':  /* read tag */
1567
        /* TODO: check if this is a comment <!-- comment -->  */
1568
      chr= my_tospace(GET);
1569
      if(chr == '!')
1570
      {
1571
        chr2= GET;
1572
        chr3= GET;
1573
        
1574
        if(chr2 == '-' && chr3 == '-')
1575
        {
1576
          chr2= 0;
1577
          chr3= 0;
1578
          chr= my_tospace(GET);
1579
          
1580
          while(chr != '>' || chr2 != '-' || chr3 != '-')
1581
          {
1582
            if(chr == '-')
1583
            {
1584
              chr3= chr2;
1585
              chr2= chr;
1586
            }
1587
            else if (chr2 == '-')
1588
            {
1589
              chr2= 0;
1590
              chr3= 0;
1591
            }
1592
            chr= my_tospace(GET);
1593
            if (chr == my_b_EOF)
1594
              goto found_eof;
1595
          }
1596
          break;
1597
        }
1598
      }
1599
      
1600
      tag.length(0);
1601
      while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
1602
      {
1603
        if(chr != delim) /* fix for the '<field name =' format */
1604
          tag.append(chr);
1605
        chr= my_tospace(GET);
1606
      }
1607
      
1608
      if(chr == ' ' || chr == '>')
1609
      {
1610
        level++;
1611
        clear_level(level + 1);
1612
      }
1613
      
1614
      if (chr == ' ')
1615
        in_tag= true;
1616
      else 
1617
        in_tag= false;
1618
      break;
1619
      
1620
    case ' ': /* read attribute */
1621
      while(chr == ' ')  /* skip blanks */
1622
        chr= my_tospace(GET);
1623
      
1624
      if(!in_tag)
1625
        break;
1626
      
1627
      while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
1628
      {
1629
        attribute.append(chr);
1630
        chr= my_tospace(GET);
1631
      }
1632
      break;
1633
      
1634
    case '>': /* end tag - read tag value */
1635
      in_tag= false;
1636
      chr= read_value('<', &value);
1637
      if(chr == my_b_EOF)
1638
        goto found_eof;
1639
      
1640
      /* save value to list */
1641
      if(tag.length() > 0 && value.length() > 0)
1642
        taglist.push_front( new XML_TAG(level, tag, value));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1643
1 by brian
clean slate
1644
      tag.length(0);
1645
      value.length(0);
1646
      attribute.length(0);
1647
      break;
1648
      
1649
    case '/': /* close tag */
1650
      level--;
1651
      chr= my_tospace(GET);
1652
      if(chr != '>')   /* if this is an empty tag <tag   /> */
1653
        tag.length(0); /* we should keep tag value          */
1654
      while(chr != '>' && chr != my_b_EOF)
1655
      {
1656
        tag.append(chr);
1657
        chr= my_tospace(GET);
1658
      }
1659
      
1660
      if((tag.length() == line_term_length -2) &&
1661
         (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1662
         return(0); //normal return
1663
1 by brian
clean slate
1664
      chr= my_tospace(GET);
1665
      break;   
1666
      
1667
    case '=': /* attribute name end - read the value */
1668
      //check for tag field and attribute name
1669
      if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
1670
         !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
1671
      {
1672
        /*
1673
          this is format <field name="xx">xx</field>
1674
          where actual fieldname is in attribute
1675
        */
1676
        delim= my_tospace(GET);
1677
        tag.length(0);
1678
        attribute.length(0);
1679
        chr= '<'; /* we pretend that it is a tag */
1680
        level--;
1681
        break;
1682
      }
1683
      
1684
      //check for " or '
1685
      chr= GET;
1686
      if (chr == my_b_EOF)
1687
        goto found_eof;
1688
      if(chr == '"' || chr == '\'')
1689
      {
1690
        delim= chr;
1691
      }
1692
      else
1693
      {
1694
        delim= ' '; /* no delimiter, use space */
1695
        PUSH(chr);
1696
      }
1697
      
1698
      chr= read_value(delim, &value);
1699
      if(attribute.length() > 0 && value.length() > 0)
1700
        taglist.push_front(new XML_TAG(level + 1, attribute, value));
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1701
1 by brian
clean slate
1702
      attribute.length(0);
1703
      value.length(0);
1704
      if (chr != ' ')
1705
        chr= my_tospace(GET);
1706
      break;
1707
    
1708
    default:
1709
      chr= my_tospace(GET);
1710
    } /* end switch */
1711
  } /* end while */
1712
  
1713
found_eof:
1714
  eof= 1;
51.1.1 by Jay Pipes
Merged PatG's removal of various DBUG stuff with still keeping DBUG_ASSERT calls since they seem to be breaking test runs
1715
  return(1);
1 by brian
clean slate
1716
}