17
17
/* Copy data from a textfile to table */
18
/* 2006-12 Erik Wetterberg : LOAD XML added */
19
#include <drizzled/server_includes.h>
20
#include "mysql_priv.h"
20
23
#include "sql_repl.h"
21
#include <drizzled/drizzled_error_messages.h>
30
XML_TAG(int l, String f, String v);
34
XML_TAG::XML_TAG(int l, String f, String v)
26
unsigned char *buffer, /* Buffer for read text */
44
uchar *buffer, /* Buffer for read text */
27
45
*end_of_buff; /* Data in bufferts ends here */
28
46
uint buff_length, /* Length of buffert */
29
47
max_length; /* Max length of row */
35
53
bool need_end_io_cache;
56
int level; /* for load xml */
40
59
bool error,line_cuted,found_null,enclosed;
41
unsigned char *row_start, /* Found row starts here */
60
uchar *row_start, /* Found row starts here */
42
61
*row_end; /* Found row ends here */
43
const CHARSET_INFO *read_charset;
62
CHARSET_INFO *read_charset;
45
READ_INFO(File file,uint32_t tot_length, const CHARSET_INFO * const cs,
64
READ_INFO(File file,uint tot_length,CHARSET_INFO *cs,
46
65
String &field_term,String &line_start,String &line_term,
47
66
String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
71
95
void set_io_cache_arg(void* arg) { cache.arg = arg; }
74
static int read_fixed_length(THD *thd, COPY_INFO &info, TableList *table_list,
98
static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
75
99
List<Item> &fields_vars, List<Item> &set_fields,
76
100
List<Item> &set_values, READ_INFO &read_info,
78
102
bool ignore_check_option_errors);
79
static int read_sep_field(THD *thd, COPY_INFO &info, TableList *table_list,
103
static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
80
104
List<Item> &fields_vars, List<Item> &set_fields,
81
105
List<Item> &set_values, READ_INFO &read_info,
82
String &enclosed, uint32_t skip_lines,
106
String &enclosed, ulong skip_lines,
83
107
bool ignore_check_option_errors);
109
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
110
List<Item> &fields_vars, List<Item> &set_fields,
111
List<Item> &set_values, READ_INFO &read_info,
112
String &enclosed, ulong skip_lines,
113
bool ignore_check_option_errors);
85
115
static bool write_execute_load_query_log_event(THD *thd,
86
116
bool duplicates, bool ignore,
87
117
bool transactional_table,
349
379
table->file->ha_start_bulk_insert((ha_rows) 0);
350
380
table->copy_blobs=1;
352
thd->abort_on_warning= true;
382
thd->abort_on_warning= (!ignore &&
383
(thd->variables.sql_mode &
384
(MODE_STRICT_TRANS_TABLES |
385
MODE_STRICT_ALL_TABLES)));
354
if (!field_term->length() && !enclosed->length())
387
if (ex->filetype == FILETYPE_XML) /* load xml */
388
error= read_xml_field(thd, info, table_list, fields_vars,
389
set_fields, set_values, read_info,
390
*(ex->line_term), skip_lines, ignore);
391
else if (!field_term->length() && !enclosed->length())
355
392
error= read_fixed_length(thd, info, table_list, fields_vars,
356
393
set_fields, set_values, read_info,
357
394
skip_lines, ignore);
498
535
****************************************************************************/
501
read_fixed_length(THD *thd, COPY_INFO &info, TableList *table_list,
538
read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
502
539
List<Item> &fields_vars, List<Item> &set_fields,
503
540
List<Item> &set_values, READ_INFO &read_info,
504
uint32_t skip_lines, bool ignore_check_option_errors)
541
ulong skip_lines, bool ignore_check_option_errors)
506
543
List_iterator_fast<Item> it(fields_vars);
507
544
Item_field *sql_field;
508
Table *table= table_list->table;
545
TABLE *table= table_list->table;
617
read_sep_field(THD *thd, COPY_INFO &info, TableList *table_list,
654
read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
618
655
List<Item> &fields_vars, List<Item> &set_fields,
619
656
List<Item> &set_values, READ_INFO &read_info,
620
String &enclosed, uint32_t skip_lines,
657
String &enclosed, ulong skip_lines,
621
658
bool ignore_check_option_errors)
623
660
List_iterator_fast<Item> it(fields_vars);
625
Table *table= table_list->table;
626
uint32_t enclosed_length;
662
TABLE *table= table_list->table;
663
uint enclosed_length;
835
/****************************************************************************
836
** Read rows in xml format
837
****************************************************************************/
839
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
840
List<Item> &fields_vars, List<Item> &set_fields,
841
List<Item> &set_values, READ_INFO &read_info,
842
String &row_tag __attribute__((__unused__)),
844
bool ignore_check_option_errors)
846
List_iterator_fast<Item> it(fields_vars);
848
TABLE *table= table_list->table;
849
bool no_trans_update_stmt;
850
CHARSET_INFO *cs= read_info.read_charset;
852
no_trans_update_stmt= !table->file->has_transactions();
854
for ( ; ; it.rewind())
858
thd->send_kill_message();
862
// read row tag and save values into tag list
863
if (read_info.read_xml())
866
List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
871
restore_record(table, s->default_values);
875
/* If this line is to be skipped we don't want to fill field or var */
879
/* find field in tag list */
883
while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
886
if (!tag) // found null
888
if (item->type() == Item::FIELD_ITEM)
890
Field *field= ((Item_field *) item)->field;
893
if (field == table->next_number_field)
894
table->auto_increment_field_not_null= true;
895
if (!field->maybe_null())
897
if (field->type() == FIELD_TYPE_TIMESTAMP)
898
((Field_timestamp *) field)->set_time();
899
else if (field != table->next_number_field)
900
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
901
ER_WARN_NULL_TO_NOTNULL, 1);
905
((Item_user_var_as_out_param *) item)->set_null_value(cs);
909
if (item->type() == Item::FIELD_ITEM)
912
Field *field= ((Item_field *)item)->field;
913
field->set_notnull();
914
if (field == table->next_number_field)
915
table->auto_increment_field_not_null= true;
916
field->store((char *) tag->value.ptr(), tag->value.length(), cs);
919
((Item_user_var_as_out_param *) item)->set_value(
920
(char *) tag->value.ptr(),
921
tag->value.length(), cs);
935
/* Have not read any field, thus input file is simply ended */
936
if (item == fields_vars.head())
939
for ( ; item; item= it++)
941
if (item->type() == Item::FIELD_ITEM)
944
QQ: We probably should not throw warning for each field.
945
But how about intention to always have the same number
946
of warnings in THD::cuted_fields (and get rid of cuted_fields
950
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
951
ER_WARN_TOO_FEW_RECORDS,
952
ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count);
955
((Item_user_var_as_out_param *)item)->set_null_value(cs);
959
if (thd->killed || fill_record(thd, set_fields, set_values,
960
ignore_check_option_errors))
963
if (write_record(thd, table, &info))
967
We don't need to reset auto-increment field since we are restoring
968
its default value at the beginning of each loop iteration.
970
thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
973
return(test(read_info.error));
798
977
/* Unescape all escape characters, mark \N as null */
852
1032
line_term_ptr=(char*) "";
854
1034
enclosed_char= (enclosed_length=enclosed_par.length()) ?
855
(unsigned char) enclosed_par[0] : INT_MAX;
856
field_term_char= field_term_length ? (unsigned char) field_term_ptr[0] : INT_MAX;
857
line_term_char= line_term_length ? (unsigned char) line_term_ptr[0] : INT_MAX;
1035
(uchar) enclosed_par[0] : INT_MAX;
1036
field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
1037
line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
858
1038
error=eof=found_end_of_line=found_null=line_cuted=0;
859
1039
buff_length=tot_length;
862
1042
/* Set of a stack for unget if long terminators */
863
uint32_t length=cmax(field_term_length,line_term_length)+1;
1043
uint length=max(field_term_length,line_term_length)+1;
864
1044
set_if_bigger(length,line_start.length());
865
1045
stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
867
if (!(buffer=(unsigned char*) my_malloc(buff_length+1,MYF(0))))
1047
if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(0))))
868
1048
error=1; /* purecov: inspected */
1427
Clear taglist from tags with a specified level
1429
int READ_INFO::clear_level(int level)
1431
List_iterator<XML_TAG> xmlit(taglist);
1435
while ((tag= xmlit++))
1437
if(tag->level >= level)
1448
Convert an XML entity to Unicode value.
1452
my_xml_entity_to_char(const char *name, uint length)
1456
if (!memcmp(name, "gt", length))
1458
if (!memcmp(name, "lt", length))
1461
else if (length == 3)
1463
if (!memcmp(name, "amp", length))
1466
else if (length == 4)
1468
if (!memcmp(name, "quot", length))
1470
if (!memcmp(name, "apos", length))
1478
@brief Convert newline, linefeed, tab to space
1480
@param chr character
1482
@details According to the "XML 1.0" standard,
1483
only space (#x20) characters, carriage returns,
1484
line feeds or tabs are considered as spaces.
1485
Convert all of them to space (#x20) for parsing simplicity.
1490
return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
1495
Read an xml value: handle multibyte and xml escape
1497
int READ_INFO::read_value(int delim, String *val)
1502
for (chr= my_tospace(GET); chr != delim && chr != my_b_EOF; )
1505
if (my_mbcharlen(read_charset, chr) > 1)
1507
int i, ml= my_mbcharlen(read_charset, chr);
1508
for (i= 1; i < ml; i++)
1512
Don't use my_tospace() in the middle of a multi-byte character
1513
TODO: check that the multi-byte sequence is valid.
1516
if (chr == my_b_EOF)
1524
for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
1526
if (chr == my_b_EOF)
1530
if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
1541
chr= my_tospace(GET);
1548
Read a record in xml format
1549
tags and attributes are stored in taglist
1550
when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
1552
int READ_INFO::read_xml()
1554
int chr, chr2, chr3;
1556
String tag, attribute, value;
1560
attribute.length(0);
1563
for (chr= my_tospace(GET); chr != my_b_EOF ; )
1566
case '<': /* read tag */
1567
/* TODO: check if this is a comment <!-- comment --> */
1568
chr= my_tospace(GET);
1574
if(chr2 == '-' && chr3 == '-')
1578
chr= my_tospace(GET);
1580
while(chr != '>' || chr2 != '-' || chr3 != '-')
1587
else if (chr2 == '-')
1592
chr= my_tospace(GET);
1593
if (chr == my_b_EOF)
1601
while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
1603
if(chr != delim) /* fix for the '<field name =' format */
1605
chr= my_tospace(GET);
1608
if(chr == ' ' || chr == '>')
1611
clear_level(level + 1);
1620
case ' ': /* read attribute */
1621
while(chr == ' ') /* skip blanks */
1622
chr= my_tospace(GET);
1627
while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
1629
attribute.append(chr);
1630
chr= my_tospace(GET);
1634
case '>': /* end tag - read tag value */
1636
chr= read_value('<', &value);
1640
/* save value to list */
1641
if(tag.length() > 0 && value.length() > 0)
1642
taglist.push_front( new XML_TAG(level, tag, value));
1646
attribute.length(0);
1649
case '/': /* close tag */
1651
chr= my_tospace(GET);
1652
if(chr != '>') /* if this is an empty tag <tag /> */
1653
tag.length(0); /* we should keep tag value */
1654
while(chr != '>' && chr != my_b_EOF)
1657
chr= my_tospace(GET);
1660
if((tag.length() == line_term_length -2) &&
1661
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
1662
return(0); //normal return
1664
chr= my_tospace(GET);
1667
case '=': /* attribute name end - read the value */
1668
//check for tag field and attribute name
1669
if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
1670
!memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
1673
this is format <field name="xx">xx</field>
1674
where actual fieldname is in attribute
1676
delim= my_tospace(GET);
1678
attribute.length(0);
1679
chr= '<'; /* we pretend that it is a tag */
1686
if (chr == my_b_EOF)
1688
if(chr == '"' || chr == '\'')
1694
delim= ' '; /* no delimiter, use space */
1698
chr= read_value(delim, &value);
1699
if(attribute.length() > 0 && value.length() > 0)
1700
taglist.push_front(new XML_TAG(level + 1, attribute, value));
1702
attribute.length(0);
1705
chr= my_tospace(GET);
1709
chr= my_tospace(GET);