1
package drizzled.message;
2
option optimize_for = SPEED;
10
* This file contains definitions for protobuffer messages that
11
* are involved in Drizzle's replication system.
15
* We do not discuss implementation specifics about how messages
16
* are transmitted across the wire in this documentation because a
17
* discussion about those implementation details is immaterial to the
18
* definition of the messages used in replication.
20
* For a discussion on the implementation of how messages are transmitted
21
* to replicas, see the plugins which implement the replication system.
25
* Messages defined in this file are related in the following ways:
27
* ------------------------------------------------------------------
29
* | Transaction message |
31
* | ----------------------------------------------------------- |
33
* | | TransactionContext message | |
35
* | ----------------------------------------------------------- |
36
* | ----------------------------------------------------------- |
38
* | | Statement message 1 | |
40
* | ----------------------------------------------------------- |
41
* | ----------------------------------------------------------- |
43
* | | Statement message 2 | |
45
* | ----------------------------------------------------------- |
47
* | ----------------------------------------------------------- |
49
* | | Statement message N | |
51
* | ----------------------------------------------------------- |
52
* ------------------------------------------------------------------
54
* with each Statement message looking like so:
56
* ------------------------------------------------------------------
58
* | Statement message |
60
* | ----------------------------------------------------------- |
62
* | | Common information | |
64
* | | - Type of Statement (INSERT, DELETE, etc) | |
65
* | | - Start Timestamp | |
66
* | | - End Timestamp | |
67
* | | - (OPTIONAL) Actual SQL query string | |
69
* | ----------------------------------------------------------- |
70
* | ----------------------------------------------------------- |
72
* | | Statement subclass message 1 (see below) | |
74
* | ----------------------------------------------------------- |
76
* | ----------------------------------------------------------- |
78
* | | Statement subclass message N (see below) | |
80
* | ----------------------------------------------------------- |
81
* ------------------------------------------------------------------
83
* The Transaction Message
84
* =======================
86
* The main "envelope" message which represents an atomic transaction
87
* which changed the state of a server is the Transaction message class.
89
* The Transaction message contains two pieces:
91
* 1) A TransactionContext message containing information about the
92
* transaction as a whole, such as the ID of the executing server,
93
* the start and end timestamp of the transaction, and a globally-
94
* unique identifier for the transaction.
96
* 2) A vector of Statement messages representing the distinct SQL
97
* statements which modified the state of the server. The Statement
98
* message is, itself, a generic envelope message containing a
99
* sub-message which describes the specific data modification which
100
* occurred on the server (such as, for instance, an INSERT statement.
102
* The Statement Message
103
* =====================
105
* The generic "envelope" message containing information common to each
106
* SQL statement executed against a server (such as a start and end timestamp
107
* and the type of the SQL statement) as well as a Statement subclass message
108
* describing the specific data modification event on the server.
110
* Each Statement message contains a type member which indicates how readers
111
* of the Statement should construct the inner Statement subclass representing
114
* For example, suppose we have have read a Transaction message and have executed
115
* the following code:
119
* message::Statement &statement= transaction.statement(0); // Get first statement
121
* switch (statement.type())
123
* case Statement::INSERT:
125
* // Assume this is where we land...so, we know that the Statement
126
* // message represents an INSERT SQL statement. We must now get the
127
* // specific information describing the INSERT statement.
129
* // We assume here a simple INSERT statement that does not represent
130
* // a bulk operation.
131
* message::InsertHeader &insert_header= statement.insert_header();
132
* message::TableMetadata &table_metadata= insert_header.table_metadata();
134
* // Grab table name and echo out as start of INSERT SQL string...
135
* cout << "INSERT INTO `" << table_metadata.schema_name();
136
* cout << "`.`" << table_metadata.table_name() << "` (";
138
* // Add field list to SQL string...
139
* uint32_t num_fields= insert_header.field_metadata_size();
142
* for (x= 0; x < num_fields; ++x)
144
* message::FieldMetadata &field_metadata= insert_header.field_metadata(x);
147
* cout << "`" << field_metadata.name() << "`";
150
* cout << ") VALUES (";
152
* // Add insert values
153
* message::InsertData &insert_data= statement.insert_data();
154
* uint32_t num_records= insert_data.record_size();
156
* for (x= 0; x < num_records; ++x)
160
* for (y= 0; y < num_fields; ++y)
164
* cout << "'" << insert_data.record(x).insert_value(y) << "'";
174
* How Bulk Operations Work
175
* ========================
177
* Certain operations which change large volumes of data on a server
178
* present a specific set of problems for a transaction coordinator or
179
* replication service. If all operations must complete atomically on a
180
* publishing server before replicas are delivered the complete
181
* transactional unit:
183
* 1) The publishing server could consume a large amount of memory
184
* building an in-memory Transaction message containing all the
185
* operations contained in the entire transaction.
187
* 2) A replica, or subscribing server, is wasting time waiting on the
188
* eventual completion (commit) of the large transaction on the
189
* publishing server. It could be applying pieces of the large
190
* transaction in the meantime...
192
* In order to prevent the problems inherent in 1) and 2) above, Drizzle's
193
* replication system uses a mechanism which provides bulk change
196
* When a regular SQL statement modifies or inserts more rows than a
197
* certain threshold, Drizzle's replication services component will begin
198
* sending Transaction messages to replicas which contain a chunk
199
* (or "segment") of the data which has been changed on the publisher.
201
* When data is inserted, updated, or modified in the database, a
202
* header containing information about modified tables and fields is
203
* matched with one or more data segments which contain the actual
204
* values changed in the statement.
206
* It's easiest to understand this mechanism by following through a real-world
209
* Suppose the following table:
211
* CREATE TABLE test.person
213
* id INT NOT NULL AUTO_INCREMENT PRIMARY KEY
214
* , first_name VARCHAR(50)
215
* , last_name VARCHAR(50)
216
* , is_active CHAR(1) NOT NULL DEFAULT 'Y'
219
* Also suppose that test.t1 contains <strong>1 million records</strong>.
221
* Next, suppose a client issues the SQL statement:
223
* UPDATE test.person SET is_active = 'N';
225
* It is clear that one million records could be updated by this statement
226
* (we say, "could be" since Drizzle does not actually update a record if
227
* the UPDATE would not change the existing record...).
229
* In order to prevent the publishing server from having to construct an
230
* enormous Transaction message, Drizzle's replication services component
231
* will do the following:
233
* 1) Construct a Transaction message with a transaction context containing
234
* information about the originating server, the transaction ID, and
235
* timestamp information.
237
* 2) Construct an UpdateHeader message with information about the tables
238
* and fields involved in the UPDATE statement. Push this UpdateHeader
239
* message onto the Transaction message's statement vector.
241
* 3) Construct an UpdateData message. Set the segment_id member to 1.
242
* Set the end_segment member to true.
244
* 4) For every record updated in a storage engine, the ReplicationServices
245
* component builds a new UpdateRecord message and appends this message
246
* to the aforementioned UpdateData message's record vector.
248
* 5) After a certain threshold of records is reached, the
249
* ReplicationServices component sets the current UpdateData message's
250
* end_segment member to false, and proceeds to send the Transaction
251
* message to replicators.
253
* 6) The ReplicationServices component then constructs a new Transaction
254
* message and constructs a transaction context with the same
255
* transaction ID and server information.
257
* 7) A new UpdateData message is created. The message's segment_id is
258
* set to N+1 and as new records are updated, new UpdateRecord messages
259
* are appended to the UpdateData message's record vector.
261
* 8) While records are being updated, we repeat steps 5 through 7, with
262
* only the final UpdateData message having its end_segment member set
268
* Any time a transaction is rolled back, a single Transaction message is
269
* containing a Statement with type = ROLLBACK is sent to replicators.
271
* What the replicator does with this information depends on the
272
* replicator. For most, only rollbacks of bulk operations will actually
273
* trigger any real action on a replica, since non-bulk operations won't
274
* have changed any data on a replica and the ROLLBACK is only sent to a
275
* replica to notify it of a transaction being rolled back (so that the
276
* replica can understand transaction ID sequence gaps...)
280
* Some minimal information transferred in the header of Statement
281
* submessage classes which identifies metadata about a specific
282
* field involved in a Statemet.
284
message FieldMetadata
286
required Table.Field.FieldType type = 1; /* Type of the field */
287
required string name = 2; /* Name of the field */
291
* Minimal information transferred in the header of Statement submessage
292
* classes which identifies metadata about the schema objects being
293
* modified in a Statement.
295
message TableMetadata
297
required string schema_name = 1; /* Name of the containing schema */
298
required string table_name = 2; /* Name of the table */
302
Context for a transaction.
304
message TransactionContext
306
required uint32 server_id = 1; /* Unique identifier of a server */
307
required uint64 transaction_id = 2; /* Globally-unique transaction ID */
308
required uint64 start_timestamp = 3; /* Timestamp of when the transaction started */
309
required uint64 end_timestamp = 4; /* Timestamp of when the transaction ended */
313
* Represents a single record being inserted into a single table.
317
* An INSERT Statement contains one or more InsertRecord submessages, each
318
* of which represents a single record being inserted into a table.
322
repeated bytes insert_value = 1;
326
* Represents statements which insert data into the database:
331
* REPLACE (is a delete and an insert)
333
* The statement is composed of a header (InsertHeader) containing
334
* metadata about affected tables and fields, as well as one or more data
335
* segments (InsertData) containing the actual records
340
* Bulk insert operations will have >1 data segment, with the last data
341
* segment having its end_segment member set to true.
345
required TableMetadata table_metadata = 1; /* Minimal metadata about the table affected */
346
repeated FieldMetadata field_metadata = 2; /* Collection of metadata about fields affected */
351
required uint32 segment_id = 1; /* The segment number */
352
required bool end_segment = 2; /* Is this the final segment? */
353
repeated InsertRecord record = 3; /* The records inserted */
357
* Represents a single record being updated in a single table.
361
* An UPDATE Statement contains one or more UpdateRecord submessages, each
362
* of which represents a single record being updated in a table.
366
repeated bytes key_value = 1; /* The value of keys of updated records (unique or primary key) */
367
repeated bytes before_value = 2; /* The value of the record before the update (optional) */
371
* Represents statements which update data in the database:
373
* INSERT ... ON DUPLICATE KEY UPDATE
376
* The statement is composed of a header (UpdateHeader) containing
377
* metadata about affected tables and fields, as well as one or more data
378
* segments (UpdateData) containing the actual records
383
* Bulk update operations will have >1 data segment, with the last data
384
* segment having its end_segment member set to true.
388
required TableMetadata table_metadata = 1; /* Minimal metadata about the table affected */
389
repeated FieldMetadata key_field_metadata = 2; /* Collection of metadata about key fields */
390
repeated FieldMetadata set_field_metadata = 3; /* Collection of metadata about fields affected */
391
repeated bytes set_value = 4; /* The value of the field after the update */
396
required uint32 segment_id = 1; /* The segment number */
397
required bool end_segment = 2; /* Is this the final segment? */
398
repeated UpdateRecord record = 3; /* Collection of same size as above metadata containing the
399
values of all records of all the fields being updated. */
403
* Represents a single record being deleted in a single table.
407
* A DELETE Statement contains one or more DeleteRecord submessages, each
408
* of which represents a single record being delete from a table.
412
repeated bytes key_value = 1;
416
* Represents statements which delete data from the database:
419
* REPLACE (is a delete and an insert)
421
* The statement is composed of a header (DeleteHeader) containing
422
* metadata about affected tables and fields, as well as one or more data
423
* segments (DeleteData) containing the actual records
428
* Bulk delete operations will have >1 data segment, with the last data
429
* segment having its end_segment member set to true.
433
required TableMetadata table_metadata = 1; /* Minimal metadata about the table affected */
434
repeated FieldMetadata key_field_metadata = 2; /* Collection of metadata about key fields */
439
required uint32 segment_id = 1; /* The segment number */
440
required bool end_segment = 2; /* Is this the final segment? */
441
repeated DeleteRecord record = 3; /* Collection of same size as above metadata containing the
442
values of all records of all the fields being deleted. */
446
* Represents a TRUNCATE TABLE statement
448
message TruncateTableStatement
450
required TableMetadata table_metadata = 1; /* Metadata about table to truncate */
454
* Represents a CREATE SCHEMA statement
456
message CreateSchemaStatement
458
required Schema schema = 1; /* Definition of new schema */
462
* Represents an ALTER SCHEMA statement
464
message AlterSchemaStatement
466
required Schema before = 1; /* Definition of old schema */
467
required Schema after = 2; /* Definition of new schema */
471
* Represents a DROP SCHEMA statement
473
message DropSchemaStatement
475
required string schema_name = 1; /* Name of the schema to drop */
479
* Represents a CREATE TABLE statement.
481
message CreateTableStatement
483
required Table table = 1; /* The full table definition for the new table */
487
* Represents an ALTER TABLE statement.
489
message AlterTableStatement
491
required Table before = 1; /* The prior full table definition */
492
required Table after = 2; /* The new full table definition */
496
* Represents a DROP TABLE statement
498
message DropTableStatement
500
required TableMetadata table_metadata = 1; /* Minimal metadata about the table to be dropped */
504
* Represents a SET statement
508
* This is constructed only for changes in a global variable. For changes
509
* to a session-local variable, the variable's contents are not transmitted
510
* as Statement messages.
512
message SetVariableStatement
514
required FieldMetadata variable_metadata = 1; /* Metadata about the variable to set */
515
required bytes variable_value = 2; /* Value to set variable */
519
* Base message class for a Statement. A Transaction is composed of
520
* one or more Statement messages. The transaction message represents
521
* a single atomic unit of change in the server's state. Depending on
522
* whether the server/connection is in AUTOCOMMIT mode or not, a
523
* Transaction message may contain one or more than one Statement message.
529
ROLLBACK = 0; /* A ROLLBACK indicator */
530
INSERT = 1; /* An INSERT statement */
531
DELETE = 2; /* A DELETE statement */
532
UPDATE = 3; /* An UPDATE statement */
533
TRUNCATE_TABLE = 4; /* A TRUNCATE TABLE statement */
534
CREATE_SCHEMA = 5; /* A CREATE SCHEMA statement */
535
ALTER_SCHEMA = 6; /* An ALTER SCHEMA statement */
536
DROP_SCHEMA = 7; /* A DROP SCHEMA statement */
537
CREATE_TABLE = 8; /* A CREATE TABLE statement */
538
ALTER_TABLE = 9; /* An ALTER TABLE statement */
539
DROP_TABLE = 10; /* A DROP TABLE statement */
540
SET_VARIABLE = 98; /* A SET statement */
541
RAW_SQL = 99; /* A raw SQL statement */
543
required Type type = 1; /* The type of the Statement */
544
required uint64 start_timestamp = 2; /* Nanosecond precision timestamp of when the
545
Statement was started on the server */
546
required uint64 end_timestamp = 3; /* Nanosecond precision timestamp of when the
547
Statement finished executing on the server */
548
optional string sql = 4; /* May contain the original SQL string */
551
* Each Statement message may contain one or more of
552
* the below sub-messages, depending on the Statement's type.
554
optional InsertHeader insert_header = 5;
555
optional InsertData insert_data = 6;
556
optional UpdateHeader update_header = 7;
557
optional UpdateData update_data = 8;
558
optional DeleteHeader delete_header = 9;
559
optional DeleteData delete_data = 10;
560
optional TruncateTableStatement truncate_table_statement = 11;
561
optional CreateSchemaStatement create_schema_statement = 12;
562
optional DropSchemaStatement drop_schema_statement = 13;
563
optional AlterSchemaStatement alter_schema_statement = 14;
564
optional CreateTableStatement create_table_statement = 15;
565
optional AlterTableStatement alter_table_statement = 16;
566
optional DropTableStatement drop_table_statement = 17;
567
optional SetVariableStatement set_variable_statement = 18;
571
* Represents a collection of Statement messages that
572
* has been executed atomically on a server.
576
* For bulk operations, the transaction may contain only
577
* a data segment, and the atomic guarantee can only be enforced by XA.
581
required TransactionContext transaction_context = 1;
582
repeated Statement statement = 2;