1
/* Copyright (C) 2000-2006 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
17
/* Copy data from a textfile to table */
18
/* 2006-12 Erik Wetterberg : LOAD XML added */
20
#include "mysql_priv.h"
30
XML_TAG(int l, String f, String v);
34
XML_TAG::XML_TAG(int l, String f, String v)
44
uchar *buffer, /* Buffer for read text */
45
*end_of_buff; /* Data in bufferts ends here */
46
uint buff_length, /* Length of buffert */
47
max_length; /* Max length of row */
48
char *field_term_ptr,*line_term_ptr,*line_start_ptr,*line_start_end;
49
uint field_term_length,line_term_length,enclosed_length;
50
int field_term_char,line_term_char,enclosed_char,escape_char;
51
int *stack,*stack_pos;
52
bool found_end_of_line,start_of_line,eof;
53
bool need_end_io_cache;
56
int level; /* for load xml */
59
bool error,line_cuted,found_null,enclosed;
60
uchar *row_start, /* Found row starts here */
61
*row_end; /* Found row ends here */
62
CHARSET_INFO *read_charset;
64
READ_INFO(File file,uint tot_length,CHARSET_INFO *cs,
65
String &field_term,String &line_start,String &line_term,
66
String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
69
int read_fixed_length(void);
71
char unescape(char chr);
72
int terminator(char *ptr,uint length);
73
bool find_start_of_fields();
75
List<XML_TAG> taglist;
76
int read_value(int delim, String *val);
78
int clear_level(int level);
81
We need to force cache close before destructor is invoked to log
86
::end_io_cache(&cache);
87
need_end_io_cache = 0;
91
Either this method, or we need to make cache public
92
Arg must be set from mysql_load() since constructor does not see
93
either the table or THD value
95
void set_io_cache_arg(void* arg) { cache.arg = arg; }
98
static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
99
List<Item> &fields_vars, List<Item> &set_fields,
100
List<Item> &set_values, READ_INFO &read_info,
102
bool ignore_check_option_errors);
103
static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
104
List<Item> &fields_vars, List<Item> &set_fields,
105
List<Item> &set_values, READ_INFO &read_info,
106
String &enclosed, ulong skip_lines,
107
bool ignore_check_option_errors);
109
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
110
List<Item> &fields_vars, List<Item> &set_fields,
111
List<Item> &set_values, READ_INFO &read_info,
112
String &enclosed, ulong skip_lines,
113
bool ignore_check_option_errors);
115
static bool write_execute_load_query_log_event(THD *thd,
116
bool duplicates, bool ignore,
117
bool transactional_table,
118
THD::killed_state killed_status);
121
Execute LOAD DATA query
126
ex - sql_exchange object representing source file and its parsing rules
127
table_list - list of tables to which we are loading data
128
fields_vars - list of fields and variables to which we read
130
set_fields - list of fields mentioned in set clause
131
set_values - expressions to assign to fields in previous list
132
handle_duplicates - indicates whenever we should emit error or
133
replace row if we will meet duplicates.
134
ignore - - indicates whenever we should ignore duplicates
135
read_file_from_client - is this LOAD DATA LOCAL ?
138
TRUE - error / FALSE - success
141
int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
142
List<Item> &fields_vars, List<Item> &set_fields,
143
List<Item> &set_values,
144
enum enum_duplicates handle_duplicates, bool ignore,
145
bool read_file_from_client)
147
char name[FN_REFLEN];
151
String *field_term=ex->field_term,*escaped=ex->escaped;
152
String *enclosed=ex->enclosed;
154
LOAD_FILE_INFO lf_info;
155
char *db = table_list->db; // This is never null
157
If path for file is not defined, we will use the current database.
158
If this is not set, we will use the directory where the table to be
161
char *tdb= thd->db ? thd->db : db; // Result is never null
162
ulong skip_lines= ex->skip_lines;
163
bool transactional_table;
164
THD::killed_state killed_status= THD::NOT_KILLED;
165
DBUG_ENTER("mysql_load");
167
if (escaped->length() > 1 || enclosed->length() > 1)
169
my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
173
if (open_and_lock_tables(thd, table_list))
175
if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
176
&thd->lex->select_lex.top_join_list,
178
&thd->lex->select_lex.leaf_tables, true))
182
Let us emit an error if we are loading data to table which is used
183
in subselect in SET clause like we do it for INSERT.
185
The main thing to fix to remove this restriction is to ensure that the
186
table is marked to be 'used for insert' in which case we should never
187
mark this table as 'const table' (ie, one that has only one row).
189
if (unique_table(thd, table_list, table_list->next_global, 0))
191
my_error(ER_UPDATE_TABLE_USED, MYF(0), table_list->table_name);
195
table= table_list->table;
196
transactional_table= table->file->has_transactions();
198
if (!fields_vars.elements)
201
for (field=table->field; *field ; field++)
202
fields_vars.push_back(new Item_field(*field));
203
bitmap_set_all(table->write_set);
204
table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
206
Let us also prepare SET clause, altough it is probably empty
209
if (setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
210
setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
215
/* TODO: use this conds for 'WITH CHECK OPTIONS' */
216
if (setup_fields(thd, 0, fields_vars, MARK_COLUMNS_WRITE, 0, 0) ||
217
setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
218
check_that_all_fields_are_given_values(thd, table, table_list))
221
Check whenever TIMESTAMP field with auto-set feature specified
224
if (table->timestamp_field)
226
if (bitmap_is_set(table->write_set,
227
table->timestamp_field->field_index))
228
table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
231
bitmap_set_bit(table->write_set,
232
table->timestamp_field->field_index);
235
/* Fix the expressions in SET clause */
236
if (setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
240
table->mark_columns_needed_for_insert();
243
bool use_blobs= 0, use_vars= 0;
244
List_iterator_fast<Item> it(fields_vars);
249
Item *real_item= item->real_item();
251
if (real_item->type() == Item::FIELD_ITEM)
253
Field *field= ((Item_field*)real_item)->field;
254
if (field->flags & BLOB_FLAG)
257
tot_length+= 256; // Will be extended if needed
260
tot_length+= field->field_length;
262
else if (item->type() == Item::STRING_ITEM)
265
if (use_blobs && !ex->line_term->length() && !field_term->length())
267
my_message(ER_BLOBS_AND_NO_TERMINATED,ER(ER_BLOBS_AND_NO_TERMINATED),
271
if (use_vars && !field_term->length() && !enclosed->length())
273
my_error(ER_LOAD_FROM_FIXED_SIZE_ROWS_TO_VAR, MYF(0));
277
/* We can't give an error in the middle when using LOCAL files */
278
if (read_file_from_client && handle_duplicates == DUP_ERROR)
281
if (read_file_from_client)
283
(void)net_request_file(&thd->net,ex->file_name);
288
#ifdef DONT_ALLOW_FULL_LOAD_DATA_PATHS
289
ex->file_name+=dirname_length(ex->file_name);
291
if (!dirname_length(ex->file_name))
293
strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS);
294
(void) fn_format(name, ex->file_name, name, "",
295
MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
299
(void) fn_format(name, ex->file_name, mysql_real_data_home, "",
300
MY_RELATIVE_PATH | MY_UNPACK_FILENAME);
302
if (opt_secure_file_priv &&
303
strncmp(opt_secure_file_priv, name, strlen(opt_secure_file_priv)))
305
/* Read only allowed from within dir specified by secure_file_priv */
306
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--secure-file-priv");
311
if (!my_stat(name,&stat_info,MYF(MY_WME)))
314
// if we are not in slave thread, the file must be:
315
if (!thd->slave_thread &&
316
!((stat_info.st_mode & S_IROTH) == S_IROTH && // readable by others
317
(stat_info.st_mode & S_IFLNK) != S_IFLNK && // and not a symlink
318
((stat_info.st_mode & S_IFREG) == S_IFREG ||
319
(stat_info.st_mode & S_IFIFO) == S_IFIFO)))
321
my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name);
324
if ((stat_info.st_mode & S_IFIFO) == S_IFIFO)
327
if ((file=my_open(name,O_RDONLY,MYF(MY_WME))) < 0)
332
bzero((char*) &info,sizeof(info));
334
info.handle_duplicates=handle_duplicates;
335
info.escape_char=escaped->length() ? (*escaped)[0] : INT_MAX;
337
READ_INFO read_info(file,tot_length,
338
ex->cs ? ex->cs : thd->variables.collation_database,
339
*field_term,*ex->line_start, *ex->line_term, *enclosed,
340
info.escape_char, read_file_from_client, is_fifo);
344
my_close(file,MYF(0)); // no files in net reading
345
DBUG_RETURN(TRUE); // Can't allocate buffers
348
if (mysql_bin_log.is_open())
351
lf_info.wrote_create_file = 0;
352
lf_info.last_pos_in_file = HA_POS_ERROR;
353
lf_info.log_delayed= transactional_table;
354
read_info.set_io_cache_arg((void*) &lf_info);
357
thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */
358
thd->cuted_fields=0L;
359
/* Skip lines if there is a line terminator */
360
if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
362
/* ex->skip_lines needs to be preserved for logging */
363
while (skip_lines > 0)
366
if (read_info.next_line())
371
if (!(error=test(read_info.error)))
374
table->next_number_field=table->found_next_number_field;
376
handle_duplicates == DUP_REPLACE)
377
table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
378
if (handle_duplicates == DUP_REPLACE)
379
table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
380
table->file->ha_start_bulk_insert((ha_rows) 0);
383
thd->abort_on_warning= (!ignore &&
384
(thd->variables.sql_mode &
385
(MODE_STRICT_TRANS_TABLES |
386
MODE_STRICT_ALL_TABLES)));
388
if (ex->filetype == FILETYPE_XML) /* load xml */
389
error= read_xml_field(thd, info, table_list, fields_vars,
390
set_fields, set_values, read_info,
391
*(ex->line_term), skip_lines, ignore);
392
else if (!field_term->length() && !enclosed->length())
393
error= read_fixed_length(thd, info, table_list, fields_vars,
394
set_fields, set_values, read_info,
397
error= read_sep_field(thd, info, table_list, fields_vars,
398
set_fields, set_values, read_info,
399
*enclosed, skip_lines, ignore);
400
if (table->file->ha_end_bulk_insert() && !error)
402
table->file->print_error(my_errno, MYF(0));
405
table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
406
table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
407
table->next_number_field=0;
410
my_close(file,MYF(0));
411
free_blobs(table); /* if pack_blob was used */
413
thd->count_cuted_fields= CHECK_FIELD_IGNORE;
415
simulated killing in the middle of per-row loop
416
must be effective for binlogging
418
DBUG_EXECUTE_IF("simulate_kill_bug27571",
421
thd->killed= THD::KILL_QUERY;
423
killed_status= (error == 0)? THD::NOT_KILLED : thd->killed;
426
if (read_file_from_client)
427
while (!read_info.next_line())
430
if (mysql_bin_log.is_open())
434
Make sure last block (the one which caused the error) gets
435
logged. This is needed because otherwise after write of (to
436
the binlog, not to read_info (which is a cache))
437
Delete_file_log_event the bad block will remain in read_info
438
(because pre_read is not called at the end of the last
439
block; remember pre_read is called whenever a new block is
440
read from disk). At the end of mysql_load(), the destructor
441
of read_info will call end_io_cache() which will flush
442
read_info, so we will finally have this in the binlog:
444
Append_block # The last successfull block
446
Append_block # The failing block
448
Or could also be (for a small file)
449
Create_file # The failing block
450
which is nonsense (Delete_file is not written in this case, because:
451
Create_file has not been written, so Delete_file is not written, then
452
when read_info is destroyed end_io_cache() is called which writes
455
read_info.end_io_cache();
456
/* If the file was not empty, wrote_create_file is true */
457
if (lf_info.wrote_create_file)
459
if (thd->transaction.stmt.modified_non_trans_table)
460
write_execute_load_query_log_event(thd, handle_duplicates,
461
ignore, transactional_table,
465
Delete_file_log_event d(thd, db, transactional_table);
466
d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
467
mysql_bin_log.write(&d);
472
error= -1; // Error on read
475
sprintf(name, ER(ER_LOAD_INFO), (ulong) info.records, (ulong) info.deleted,
476
(ulong) (info.records - info.copied), (ulong) thd->cuted_fields);
478
if (thd->transaction.stmt.modified_non_trans_table)
479
thd->transaction.all.modified_non_trans_table= TRUE;
481
if (mysql_bin_log.is_open())
484
We need to do the job that is normally done inside
485
binlog_query() here, which is to ensure that the pending event
486
is written before tables are unlocked and before any other
487
events are written. We also need to update the table map
488
version for the binary log to mark that table maps are invalid
491
if (thd->current_stmt_binlog_row_based)
492
thd->binlog_flush_pending_rows_event(true);
496
As already explained above, we need to call end_io_cache() or the last
497
block will be logged only after Execute_load_query_log_event (which is
498
wrong), when read_info is destroyed.
500
read_info.end_io_cache();
501
if (lf_info.wrote_create_file)
503
write_execute_load_query_log_event(thd, handle_duplicates, ignore,
504
transactional_table,killed_status);
509
/* ok to client sent only after binlog write and engine commit */
510
my_ok(thd, info.copied + info.deleted, 0L, name);
512
DBUG_ASSERT(transactional_table || !(info.copied || info.deleted) ||
513
thd->transaction.stmt.modified_non_trans_table);
514
table->file->ha_release_auto_increment();
515
table->auto_increment_field_not_null= FALSE;
516
thd->abort_on_warning= 0;
521
/* Not a very useful function; just to avoid duplication of code */
522
static bool write_execute_load_query_log_event(THD *thd,
523
bool duplicates, bool ignore,
524
bool transactional_table,
525
THD::killed_state killed_err_arg)
527
Execute_load_query_log_event
528
e(thd, thd->query, thd->query_length,
529
(char*)thd->lex->fname_start - (char*)thd->query,
530
(char*)thd->lex->fname_end - (char*)thd->query,
531
(duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
532
(ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
533
transactional_table, FALSE, killed_err_arg);
534
e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
535
return mysql_bin_log.write(&e);
539
/****************************************************************************
540
** Read of rows of fixed size + optional garage + optonal newline
541
****************************************************************************/
544
read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
545
List<Item> &fields_vars, List<Item> &set_fields,
546
List<Item> &set_values, READ_INFO &read_info,
547
ulong skip_lines, bool ignore_check_option_errors)
549
List_iterator_fast<Item> it(fields_vars);
550
Item_field *sql_field;
551
TABLE *table= table_list->table;
554
DBUG_ENTER("read_fixed_length");
558
while (!read_info.read_fixed_length())
562
thd->send_kill_message();
568
We could implement this with a simple seek if:
569
- We are not using DATA INFILE LOCAL
570
- escape character is ""
571
- line starting prefix is ""
577
uchar *pos=read_info.row_start;
579
read_info.row_end[0]=0;
582
restore_record(table, s->default_values);
584
There is no variables in fields_vars list in this format so
585
this conversion is safe.
587
while ((sql_field= (Item_field*) it++))
589
Field *field= sql_field->field;
590
if (field == table->next_number_field)
591
table->auto_increment_field_not_null= TRUE;
593
No fields specified in fields_vars list can be null in this format.
594
Mark field as not null, we should do this for each row because of
597
field->set_notnull();
599
if (pos == read_info.row_end)
601
thd->cuted_fields++; /* Not enough fields */
602
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
603
ER_WARN_TOO_FEW_RECORDS,
604
ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
605
if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
606
((Field_timestamp*) field)->set_time();
612
if ((length=(uint) (read_info.row_end-pos)) >
614
length=field->field_length;
615
save_chr=pos[length]; pos[length]='\0'; // Safeguard aganst malloc
616
field->store((char*) pos,length,read_info.read_charset);
617
pos[length]=save_chr;
618
if ((pos+=length) > read_info.row_end)
619
pos= read_info.row_end; /* Fills rest with space */
622
if (pos != read_info.row_end)
624
thd->cuted_fields++; /* To long row */
625
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
626
ER_WARN_TOO_MANY_RECORDS,
627
ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count);
631
fill_record(thd, set_fields, set_values,
632
ignore_check_option_errors))
635
err= write_record(thd, table, &info);
636
table->auto_increment_field_not_null= FALSE;
641
We don't need to reset auto-increment field since we are restoring
642
its default value at the beginning of each loop iteration.
644
if (read_info.next_line()) // Skip to next line
646
if (read_info.line_cuted)
648
thd->cuted_fields++; /* To long row */
649
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
650
ER_WARN_TOO_MANY_RECORDS,
651
ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count);
655
DBUG_RETURN(test(read_info.error));
661
read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
662
List<Item> &fields_vars, List<Item> &set_fields,
663
List<Item> &set_values, READ_INFO &read_info,
664
String &enclosed, ulong skip_lines,
665
bool ignore_check_option_errors)
667
List_iterator_fast<Item> it(fields_vars);
669
TABLE *table= table_list->table;
670
uint enclosed_length;
673
DBUG_ENTER("read_sep_field");
675
enclosed_length=enclosed.length();
682
thd->send_kill_message();
686
restore_record(table, s->default_values);
694
if (read_info.read_field())
697
/* If this line is to be skipped we don't want to fill field or var */
701
pos=read_info.row_start;
702
length=(uint) (read_info.row_end-pos);
704
real_item= item->real_item();
706
if ((!read_info.enclosed && (enclosed_length && length == 4 && !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
707
(length == 1 && read_info.found_null))
710
if (real_item->type() == Item::FIELD_ITEM)
712
Field *field= ((Item_field *)real_item)->field;
715
my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name,
720
if (!field->maybe_null())
722
if (field->type() == MYSQL_TYPE_TIMESTAMP)
723
((Field_timestamp*) field)->set_time();
724
else if (field != table->next_number_field)
725
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
726
ER_WARN_NULL_TO_NOTNULL, 1);
729
else if (item->type() == Item::STRING_ITEM)
731
((Item_user_var_as_out_param *)item)->set_null_value(
732
read_info.read_charset);
736
my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
743
if (real_item->type() == Item::FIELD_ITEM)
745
Field *field= ((Item_field *)real_item)->field;
746
field->set_notnull();
747
read_info.row_end[0]=0; // Safe to change end marker
748
if (field == table->next_number_field)
749
table->auto_increment_field_not_null= TRUE;
750
field->store((char*) pos, length, read_info.read_charset);
752
else if (item->type() == Item::STRING_ITEM)
754
((Item_user_var_as_out_param *)item)->set_value((char*) pos, length,
755
read_info.read_charset);
759
my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
772
/* Have not read any field, thus input file is simply ended */
773
if (item == fields_vars.head())
775
for (; item ; item= it++)
777
Item *real_item= item->real_item();
778
if (real_item->type() == Item::FIELD_ITEM)
780
Field *field= ((Item_field *)real_item)->field;
783
my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name,
787
if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP)
788
((Field_timestamp*) field)->set_time();
790
QQ: We probably should not throw warning for each field.
791
But how about intention to always have the same number
792
of warnings in THD::cuted_fields (and get rid of cuted_fields
796
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
797
ER_WARN_TOO_FEW_RECORDS,
798
ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
800
else if (item->type() == Item::STRING_ITEM)
802
((Item_user_var_as_out_param *)item)->set_null_value(
803
read_info.read_charset);
807
my_error(ER_LOAD_DATA_INVALID_COLUMN, MYF(0), item->full_name());
814
fill_record(thd, set_fields, set_values,
815
ignore_check_option_errors))
818
err= write_record(thd, table, &info);
819
table->auto_increment_field_not_null= FALSE;
823
We don't need to reset auto-increment field since we are restoring
824
its default value at the beginning of each loop iteration.
826
if (read_info.next_line()) // Skip to next line
828
if (read_info.line_cuted)
830
thd->cuted_fields++; /* To long row */
831
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
832
ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS),
839
DBUG_RETURN(test(read_info.error));
843
/****************************************************************************
844
** Read rows in xml format
845
****************************************************************************/
847
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
848
List<Item> &fields_vars, List<Item> &set_fields,
849
List<Item> &set_values, READ_INFO &read_info,
850
String &row_tag, ulong skip_lines,
851
bool ignore_check_option_errors)
853
List_iterator_fast<Item> it(fields_vars);
855
TABLE *table= table_list->table;
856
bool no_trans_update_stmt;
857
CHARSET_INFO *cs= read_info.read_charset;
858
DBUG_ENTER("read_xml_field");
860
no_trans_update_stmt= !table->file->has_transactions();
862
for ( ; ; it.rewind())
866
thd->send_kill_message();
870
// read row tag and save values into tag list
871
if (read_info.read_xml())
874
List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
879
DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
880
while ((tag= xmlit++))
882
DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
883
tag->level, tag->field.c_ptr(),
884
tag->value.c_ptr()));
888
restore_record(table, s->default_values);
892
/* If this line is to be skipped we don't want to fill field or var */
896
/* find field in tag list */
900
while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
903
if (!tag) // found null
905
if (item->type() == Item::FIELD_ITEM)
907
Field *field= ((Item_field *) item)->field;
910
if (field == table->next_number_field)
911
table->auto_increment_field_not_null= TRUE;
912
if (!field->maybe_null())
914
if (field->type() == FIELD_TYPE_TIMESTAMP)
915
((Field_timestamp *) field)->set_time();
916
else if (field != table->next_number_field)
917
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
918
ER_WARN_NULL_TO_NOTNULL, 1);
922
((Item_user_var_as_out_param *) item)->set_null_value(cs);
926
if (item->type() == Item::FIELD_ITEM)
929
Field *field= ((Item_field *)item)->field;
930
field->set_notnull();
931
if (field == table->next_number_field)
932
table->auto_increment_field_not_null= TRUE;
933
field->store((char *) tag->value.ptr(), tag->value.length(), cs);
936
((Item_user_var_as_out_param *) item)->set_value(
937
(char *) tag->value.ptr(),
938
tag->value.length(), cs);
952
/* Have not read any field, thus input file is simply ended */
953
if (item == fields_vars.head())
956
for ( ; item; item= it++)
958
if (item->type() == Item::FIELD_ITEM)
961
QQ: We probably should not throw warning for each field.
962
But how about intention to always have the same number
963
of warnings in THD::cuted_fields (and get rid of cuted_fields
967
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
968
ER_WARN_TOO_FEW_RECORDS,
969
ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
972
((Item_user_var_as_out_param *)item)->set_null_value(cs);
976
if (thd->killed || fill_record(thd, set_fields, set_values,
977
ignore_check_option_errors))
980
if (write_record(thd, table, &info))
984
We don't need to reset auto-increment field since we are restoring
985
its default value at the beginning of each loop iteration.
987
thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
990
DBUG_RETURN(test(read_info.error));
994
/* Unescape all escape characters, mark \N as null */
997
READ_INFO::unescape(char chr)
999
/* keep this switch synchornous with the ESCAPE_CHARS macro */
1001
case 'n': return '\n';
1002
case 't': return '\t';
1003
case 'r': return '\r';
1004
case 'b': return '\b';
1005
case '0': return 0; // Ascii null
1006
case 'Z': return '\032'; // Win32 end of file
1007
case 'N': found_null=1;
1010
default: return chr;
1016
Read a line using buffering
1017
If last line is empty (in line mode) then it isn't outputed
1021
READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
1022
String &field_term, String &line_start, String &line_term,
1023
String &enclosed_par, int escape, bool get_it_from_net,
1025
:file(file_par),escape_char(escape)
1028
field_term_ptr=(char*) field_term.ptr();
1029
field_term_length= field_term.length();
1030
line_term_ptr=(char*) line_term.ptr();
1031
line_term_length= line_term.length();
1032
level= 0; /* for load xml */
1033
if (line_start.length() == 0)
1040
line_start_ptr=(char*) line_start.ptr();
1041
line_start_end=line_start_ptr+line_start.length();
1044
/* If field_terminator == line_terminator, don't use line_terminator */
1045
if (field_term_length == line_term_length &&
1046
!memcmp(field_term_ptr,line_term_ptr,field_term_length))
1049
line_term_ptr=(char*) "";
1051
enclosed_char= (enclosed_length=enclosed_par.length()) ?
1052
(uchar) enclosed_par[0] : INT_MAX;
1053
field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
1054
line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
1055
error=eof=found_end_of_line=found_null=line_cuted=0;
1056
buff_length=tot_length;
1059
/* Set of a stack for unget if long terminators */
1060
uint length=max(field_term_length,line_term_length)+1;
1061
set_if_bigger(length,line_start.length());
1062
stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
1064
if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(0))))
1065
error=1; /* purecov: inspected */
1068
end_of_buff=buffer+buff_length;
1069
if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
1070
(get_it_from_net) ? READ_NET :
1071
(is_fifo ? READ_FIFO : READ_CACHE),0L,1,
1074
my_free((uchar*) buffer,MYF(0)); /* purecov: inspected */
1080
init_io_cache() will not initialize read_function member
1081
if the cache is READ_NET. So we work around the problem with a
1084
need_end_io_cache = 1;
1086
if (get_it_from_net)
1087
cache.read_function = _my_b_net_read;
1089
if (mysql_bin_log.is_open())
1090
cache.pre_read = cache.pre_close =
1091
(IO_CACHE_CALLBACK) log_loaded_block;
1097
READ_INFO::~READ_INFO()
1101
if (need_end_io_cache)
1102
::end_io_cache(&cache);
1103
my_free((uchar*) buffer,MYF(0));
1106
List_iterator<XML_TAG> xmlit(taglist);
1108
while ((t= xmlit++))
1113
#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
1114
#define PUSH(A) *(stack_pos++)=(A)
1117
inline int READ_INFO::terminator(char *ptr,uint length)
1119
int chr=0; // Keep gcc happy
1121
for (i=1 ; i < length ; i++)
1123
if ((chr=GET) != *++ptr)
1132
PUSH((uchar) *--ptr);
1137
int READ_INFO::read_field()
1139
int chr,found_enclosed_char;
1140
uchar *to,*new_buffer;
1143
if (found_end_of_line)
1144
return 1; // One have to call next_line
1146
/* Skip until we find 'line_start' */
1149
{ // Skip until line_start
1151
if (find_start_of_fields())
1154
if ((chr=GET) == my_b_EOF)
1156
found_end_of_line=eof=1;
1160
if (chr == enclosed_char)
1162
found_enclosed_char=enclosed_char;
1163
*to++=(uchar) chr; // If error
1167
found_enclosed_char= INT_MAX;
1173
while ( to < end_of_buff)
1177
if ((my_mbcharlen(read_charset, chr) > 1) &&
1178
to+my_mbcharlen(read_charset, chr) <= end_of_buff)
1180
uchar* p = (uchar*)to;
1182
int ml = my_mbcharlen(read_charset, chr);
1184
for (i=1; i<ml; i++) {
1186
if (chr == my_b_EOF)
1190
if (my_ismbchar(read_charset,
1194
for (i=0; i<ml; i++)
1195
PUSH((uchar) *--to);
1199
if (chr == my_b_EOF)
1201
if (chr == escape_char)
1203
if ((chr=GET) == my_b_EOF)
1205
*to++= (uchar) escape_char;
1209
When escape_char == enclosed_char, we treat it like we do for
1210
handling quotes in SQL parsing -- you can double-up the
1211
escape_char to include it literally, but it doesn't do escapes
1212
like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
1213
with data like: "fie""ld1", "field2"
1215
if (escape_char != enclosed_char || chr == escape_char)
1217
*to++ = (uchar) unescape((char) chr);
1223
#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
1224
if (chr == line_term_char)
1226
if (chr == line_term_char && found_enclosed_char == INT_MAX)
1229
if (terminator(line_term_ptr,line_term_length))
1230
{ // Maybe unexpected linefeed
1232
found_end_of_line=1;
1238
if (chr == found_enclosed_char)
1240
if ((chr=GET) == found_enclosed_char)
1241
{ // Remove dupplicated
1242
*to++ = (uchar) chr;
1245
// End of enclosed field if followed by field_term or line_term
1246
if (chr == my_b_EOF ||
1247
(chr == line_term_char && terminator(line_term_ptr, line_term_length)))
1248
{ // Maybe unexpected linefeed
1250
found_end_of_line=1;
1255
if (chr == field_term_char &&
1256
terminator(field_term_ptr,field_term_length))
1264
The string didn't terminate yet.
1265
Store back next character for the loop
1268
/* copy the found term character to 'to' */
1269
chr= found_enclosed_char;
1271
else if (chr == field_term_char && found_enclosed_char == INT_MAX)
1273
if (terminator(field_term_ptr,field_term_length))
1281
*to++ = (uchar) chr;
1284
** We come here if buffer is too small. Enlarge it and continue
1286
if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE,
1289
to=new_buffer + (to-buffer);
1291
buff_length+=IO_SIZE;
1292
end_of_buff=buffer+buff_length;
1297
found_end_of_line=eof=1;
1304
Read a row with fixed length.
1307
The row may not be fixed size on disk if there are escape
1308
characters in the file.
1311
One can't use fixed length with multi-byte charset **
1318
int READ_INFO::read_fixed_length()
1322
if (found_end_of_line)
1323
return 1; // One have to call next_line
1326
{ // Skip until line_start
1328
if (find_start_of_fields())
1332
to=row_start=buffer;
1333
while (to < end_of_buff)
1335
if ((chr=GET) == my_b_EOF)
1337
if (chr == escape_char)
1339
if ((chr=GET) == my_b_EOF)
1341
*to++= (uchar) escape_char;
1344
*to++ =(uchar) unescape((char) chr);
1347
if (chr == line_term_char)
1349
if (terminator(line_term_ptr,line_term_length))
1350
{ // Maybe unexpected linefeed
1351
found_end_of_line=1;
1356
*to++ = (uchar) chr;
1358
row_end=to; // Found full line
1362
found_end_of_line=eof=1;
1365
return to == buffer ? 1 : 0;
1369
int READ_INFO::next_line()
1372
start_of_line= line_start_ptr != 0;
1373
if (found_end_of_line || eof)
1375
found_end_of_line=0;
1378
found_end_of_line=0;
1379
if (!line_term_length)
1380
return 0; // No lines
1385
if (my_mbcharlen(read_charset, chr) > 1)
1388
chr != my_b_EOF && i<my_mbcharlen(read_charset, chr);
1391
if (chr == escape_char)
1395
if (chr == my_b_EOF)
1400
if (chr == escape_char)
1403
if (GET == my_b_EOF)
1407
if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
1414
bool READ_INFO::find_start_of_fields()
1420
if ((chr=GET) == my_b_EOF)
1422
found_end_of_line=eof=1;
1425
} while ((char) chr != line_start_ptr[0]);
1426
for (char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
1428
chr=GET; // Eof will be checked later
1429
if ((char) chr != *ptr)
1430
{ // Can't be line_start
1432
while (--ptr != line_start_ptr)
1433
{ // Restart with next char
1444
Clear taglist from tags with a specified level
1446
int READ_INFO::clear_level(int level)
1448
DBUG_ENTER("READ_INFO::read_xml clear_level");
1449
List_iterator<XML_TAG> xmlit(taglist);
1453
while ((tag= xmlit++))
1455
if(tag->level >= level)
1466
Convert an XML entity to Unicode value.
1470
my_xml_entity_to_char(const char *name, uint length)
1474
if (!memcmp(name, "gt", length))
1476
if (!memcmp(name, "lt", length))
1479
else if (length == 3)
1481
if (!memcmp(name, "amp", length))
1484
else if (length == 4)
1486
if (!memcmp(name, "quot", length))
1488
if (!memcmp(name, "apos", length))
1496
@brief Convert newline, linefeed, tab to space
1498
@param chr character
1500
@details According to the "XML 1.0" standard,
1501
only space (#x20) characters, carriage returns,
1502
line feeds or tabs are considered as spaces.
1503
Convert all of them to space (#x20) for parsing simplicity.
1508
return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1513
Read an xml value: handle multibyte and xml escape
1515
int READ_INFO::read_value(int delim, String *val)
1520
for (chr= my_tospace(GET); chr != delim && chr != my_b_EOF; )
1523
if (my_mbcharlen(read_charset, chr) > 1)
1525
DBUG_PRINT("read_xml",("multi byte"));
1526
int i, ml= my_mbcharlen(read_charset, chr);
1527
for (i= 1; i < ml; i++)
1531
Don't use my_tospace() in the middle of a multi-byte character
1532
TODO: check that the multi-byte sequence is valid.
1535
if (chr == my_b_EOF)
1543
for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
1545
if (chr == my_b_EOF)
1549
if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1560
chr= my_tospace(GET);
1567
Read a record in xml format
1568
tags and attributes are stored in taglist
1569
when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1571
int READ_INFO::read_xml()
1573
DBUG_ENTER("READ_INFO::read_xml");
1574
int chr, chr2, chr3;
1576
String tag, attribute, value;
1580
attribute.length(0);
1583
for (chr= my_tospace(GET); chr != my_b_EOF ; )
1586
case '<': /* read tag */
1587
/* TODO: check if this is a comment <!-- comment --> */
1588
chr= my_tospace(GET);
1594
if(chr2 == '-' && chr3 == '-')
1598
chr= my_tospace(GET);
1600
while(chr != '>' || chr2 != '-' || chr3 != '-')
1607
else if (chr2 == '-')
1612
chr= my_tospace(GET);
1613
if (chr == my_b_EOF)
1621
while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
1623
if(chr != delim) /* fix for the '<field name =' format */
1625
chr= my_tospace(GET);
1628
// row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
1629
if((tag.length() == line_term_length -2) &&
1630
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
1632
DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
1633
level,tag.c_ptr_safe(), line_term_ptr));
1636
if(chr == ' ' || chr == '>')
1639
clear_level(level + 1);
1648
case ' ': /* read attribute */
1649
while(chr == ' ') /* skip blanks */
1650
chr= my_tospace(GET);
1655
while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
1657
attribute.append(chr);
1658
chr= my_tospace(GET);
1662
case '>': /* end tag - read tag value */
1664
chr= read_value('<', &value);
1668
/* save value to list */
1669
if(tag.length() > 0 && value.length() > 0)
1671
DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
1672
level,tag.c_ptr_safe(), value.c_ptr_safe()));
1673
taglist.push_front( new XML_TAG(level, tag, value));
1677
attribute.length(0);
1680
case '/': /* close tag */
1682
chr= my_tospace(GET);
1683
if(chr != '>') /* if this is an empty tag <tag /> */
1684
tag.length(0); /* we should keep tag value */
1685
while(chr != '>' && chr != my_b_EOF)
1688
chr= my_tospace(GET);
1691
if((tag.length() == line_term_length -2) &&
1692
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
1694
DBUG_PRINT("read_xml", ("found end-of-row %i %s",
1695
level, tag.c_ptr_safe()));
1696
DBUG_RETURN(0); //normal return
1698
chr= my_tospace(GET);
1701
case '=': /* attribute name end - read the value */
1702
//check for tag field and attribute name
1703
if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
1704
!memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
1707
this is format <field name="xx">xx</field>
1708
where actual fieldname is in attribute
1710
delim= my_tospace(GET);
1712
attribute.length(0);
1713
chr= '<'; /* we pretend that it is a tag */
1720
if (chr == my_b_EOF)
1722
if(chr == '"' || chr == '\'')
1728
delim= ' '; /* no delimiter, use space */
1732
chr= read_value(delim, &value);
1733
if(attribute.length() > 0 && value.length() > 0)
1735
DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
1737
attribute.c_ptr_safe(),
1738
value.c_ptr_safe()));
1739
taglist.push_front(new XML_TAG(level + 1, attribute, value));
1741
attribute.length(0);
1744
chr= my_tospace(GET);
1748
chr= my_tospace(GET);
1753
DBUG_PRINT("read_xml",("Found eof"));