Skip to content

Commit e11a540

Browse files
author
Neha Kumari
committed
WL#12168: Add Partition Information into the Binary Log
Splitting a large table into small pieces may lead to better performance if only part of the data is accessed because less information is retrieved or updated. As part of this WL replication will inject into the binary log, information on table partition (partition_id)and make it visible through mysqlbinlog. In particular, an external tool may need to take into account Innodb Partitions to support change propagation on partitioned tables.
1 parent 006c21f commit e11a540

28 files changed

+2338
-204
lines changed

libbinlogevents/include/rows_event.h

+117-11
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,21 @@
4545
*/
4646
#define EXTRA_ROW_INFO_LEN_OFFSET 0
4747
#define EXTRA_ROW_INFO_FORMAT_OFFSET 1
48-
#define EXTRA_ROW_INFO_HDR_BYTES 2
49-
#define EXTRA_ROW_INFO_MAX_PAYLOAD (255 - EXTRA_ROW_INFO_HDR_BYTES)
48+
#define EXTRA_ROW_INFO_HEADER_LENGTH 2
49+
#define EXTRA_ROW_INFO_MAX_PAYLOAD (255 - EXTRA_ROW_INFO_HEADER_LENGTH)
5050

5151
#define ROWS_MAPID_OFFSET 0
5252
#define ROWS_FLAGS_OFFSET 6
5353
#define ROWS_VHLEN_OFFSET 8
54-
#define ROWS_V_TAG_LEN 1
55-
#define ROWS_V_EXTRAINFO_TAG 0
54+
#define EXTRA_ROW_INFO_TYPECODE_LENGTH 1
55+
#define EXTRA_ROW_PART_INFO_VALUE_LENGTH 2
56+
57+
/**
58+
This is the typecode defined for the different elements present in
59+
the container Extra_row_info, this is different from the format information
60+
stored inside extra_row_ndb_info at EXTRA_ROW_INFO_FORMAT_OFFSET.
61+
*/
62+
enum class enum_extra_row_info_typecode { NDB = 0, PART = 1 };
5663

5764
namespace binary_log {
5865
/**
@@ -747,9 +754,47 @@ class Table_map_event : public Binary_log_event {
747754
</tr>
748755
749756
<tr>
750-
<td>extra_row_data</td>
751-
<td>unsigned char pointer</td>
752-
<td>Pointer to extra row data if any. If non null, first byte is length</td>
757+
<td>extra_row_info</td>
758+
<td>An object of class Extra_row_info</td>
759+
<td>The class Extra_row_info will be storing the information related
760+
to m_extra_row_ndb_info and partition info (partition_id and
761+
source_partition_id). At any given time a Rows_event can have both, one
762+
or none of ndb_info and partition_info present as part of Rows_event.
763+
In case both ndb_info and partition_info are present then below will
764+
be the order in which they will be stored.
765+
766+
@verbatim
767+
+----------+--------------------------------------+
768+
|type_code | extra_row_ndb_info |
769+
+--- ------+--------------------------------------+
770+
| NDB |Len of ndb_info |Format |ndb_data |
771+
| 1 byte |1 byte |1 byte |len - 2 byte |
772+
+----------+----------------+-------+-------------+
773+
774+
In case of INSERT/DELETE
775+
+-----------+----------------+
776+
| type_code | partition_info |
777+
+-----------+----------------+
778+
| PART | partition_id |
779+
| (1 byte) | 2 byte |
780+
+-----------+----------------+
781+
782+
In case of UPDATE
783+
+-----------+------------------------------------+
784+
| type_code | partition_info |
785+
+-----------+--------------+---------------------+
786+
| PART | partition_id | source_partition_id |
787+
| (1 byte) | 2 byte | 2 byte |
788+
+-----------+--------------+---------------------+
789+
790+
source_partition_id is used only in the case of Update_event
791+
to log the partition_id of the source partition.
792+
793+
@endverbatim
794+
This is the format for any information stored as extra_row_info.
795+
type_code is not a part of the class Extra_row_info as it is a constant
796+
values used at the time of serializing and decoding the event.
797+
</td>
753798
</tr>
754799
755800
<tr>
@@ -848,11 +893,9 @@ class Rows_event : public Binary_log_event {
848893
: Binary_log_event(type_arg),
849894
m_table_id(0),
850895
m_width(0),
851-
m_extra_row_data(0),
852896
columns_before_image(0),
853897
columns_after_image(0),
854898
row(0) {}
855-
856899
/**
857900
The constructor is responsible for decoding the event contained in
858901
the buffer.
@@ -890,13 +933,76 @@ class Rows_event : public Binary_log_event {
890933
uint32_t n_bits_len; /** value determined by (m_width + 7) / 8 */
891934
uint16_t var_header_len;
892935

893-
unsigned char *m_extra_row_data;
894-
895936
std::vector<uint8_t> columns_before_image;
896937
std::vector<uint8_t> columns_after_image;
897938
std::vector<uint8_t> row;
898939

899940
public:
941+
class Extra_row_info {
942+
private:
943+
/** partition_id for a row in a partitioned table */
944+
int m_partition_id;
945+
/**
946+
It is the partition_id of the source partition in case
947+
of Update_event, the target's partition_id is m_partition_id.
948+
This variable is used only in case of Update_event.
949+
*/
950+
int m_source_partition_id;
951+
/** The extra row info provided by NDB */
952+
unsigned char *m_extra_row_ndb_info;
953+
954+
public:
955+
Extra_row_info()
956+
: m_partition_id(UNDEFINED),
957+
m_source_partition_id(UNDEFINED),
958+
m_extra_row_ndb_info(nullptr) {}
959+
960+
Extra_row_info(const Extra_row_info &) = delete;
961+
962+
int get_partition_id() const { return m_partition_id; }
963+
void set_partition_id(int partition_id) {
964+
BAPI_ASSERT(partition_id < 65535);
965+
m_partition_id = partition_id;
966+
}
967+
968+
int get_source_partition_id() const { return m_source_partition_id; }
969+
void set_source_partition_id(int source_partition_id) {
970+
BAPI_ASSERT(source_partition_id < 65535);
971+
m_source_partition_id = source_partition_id;
972+
}
973+
974+
unsigned char *get_ndb_info() const { return m_extra_row_ndb_info; }
975+
void set_ndb_info(const unsigned char *ndb_info, size_t len) {
976+
BAPI_ASSERT(!have_ndb_info());
977+
m_extra_row_ndb_info =
978+
static_cast<unsigned char *>(bapi_malloc(len, 16 /* flags */));
979+
std::copy(ndb_info, ndb_info + len, m_extra_row_ndb_info);
980+
}
981+
/**
982+
Compares the extra_row_info in a Row event, it checks three things
983+
1. The m_extra_row_ndb_info pointers. It compares their significant bytes.
984+
2. Partition_id
985+
3. source_partition_id
986+
987+
@return
988+
true all the above variables are same in the event and the one passed
989+
in parameter.
990+
false Any of the above variable has a different value.
991+
*/
992+
bool compare_extra_row_info(const unsigned char *ndb_info_arg,
993+
int part_id_arg, int source_part_id);
994+
995+
bool have_part() const { return m_partition_id != UNDEFINED; }
996+
997+
bool have_ndb_info() const { return m_extra_row_ndb_info != nullptr; }
998+
size_t get_ndb_length();
999+
size_t get_part_length();
1000+
~Extra_row_info();
1001+
1002+
static const int UNDEFINED{INT_MAX};
1003+
};
1004+
Extra_row_info m_extra_row_info;
1005+
9001006
unsigned long long get_table_id() const { return m_table_id.id(); }
9011007

9021008
enum_flag get_flags() const { return static_cast<enum_flag>(m_flags); }

libbinlogevents/src/rows_event.cpp

+67-15
Original file line numberDiff line numberDiff line change
@@ -361,15 +361,14 @@ Rows_event::Rows_event(const char *buf, const Format_description_event *fde)
361361
: Binary_log_event(&buf, fde),
362362
m_table_id(0),
363363
m_width(0),
364-
m_extra_row_data(0),
365364
columns_before_image(0),
366365
columns_after_image(0),
367366
row(0) {
368367
BAPI_ENTER("Rows_event::Rows_event(const char*, ...)");
369368
READER_TRY_INITIALIZATION;
370369
READER_ASSERT_POSITION(fde->common_header_len);
371370
Log_event_type event_type = header()->type_code;
372-
uint16_t var_header_len = 0;
371+
size_t var_header_len = 0;
373372
size_t data_size = 0;
374373
uint8_t const post_header_len = fde->post_header_len[event_type - 1];
375374
m_type = event_type;
@@ -393,21 +392,40 @@ Rows_event::Rows_event(const char *buf, const Format_description_event *fde)
393392
/* Iterate over var-len header, extracting 'chunks' */
394393
uint64_t end = READER_CALL(position) + var_header_len;
395394
while (READER_CALL(position) < end) {
396-
uint8_t type;
397-
READER_TRY_SET(type, read<uint8_t>);
395+
int type_placeholder;
396+
READER_TRY_SET(type_placeholder, read<uint8_t>);
397+
398+
enum_extra_row_info_typecode type;
399+
type = (enum_extra_row_info_typecode)type_placeholder;
398400
switch (type) {
399-
case ROWS_V_EXTRAINFO_TAG: {
401+
case enum_extra_row_info_typecode::NDB: {
400402
/* Have an 'extra info' section, read it in */
401-
uint8_t infoLen = 0;
402-
READER_TRY_SET(infoLen, read<uint8_t>);
403-
/* infoLen is part of the buffer to be copied below */
403+
size_t ndb_infolen = 0;
404+
READER_TRY_SET(ndb_infolen, read<uint8_t>);
405+
/* ndb_infolen is part of the buffer to be copied below */
404406
READER_CALL(go_to, READER_CALL(position) - 1);
405407

406408
/* Just store/use the first tag of this type, skip others */
407-
if (!m_extra_row_data) {
408-
READER_TRY_CALL(alloc_and_memcpy, &m_extra_row_data, infoLen, 16);
409+
if (!m_extra_row_info.have_ndb_info()) {
410+
const char *ndb_info;
411+
READER_TRY_SET(ndb_info, ptr, ndb_infolen);
412+
m_extra_row_info.set_ndb_info(
413+
reinterpret_cast<const unsigned char *>(ndb_info), ndb_infolen);
414+
ndb_info = nullptr;
409415
} else {
410-
READER_TRY_CALL(forward, infoLen);
416+
READER_TRY_CALL(forward, ndb_infolen);
417+
}
418+
break;
419+
}
420+
case enum_extra_row_info_typecode::PART: {
421+
int part_id_placeholder = 0;
422+
READER_TRY_SET(part_id_placeholder, read<uint16_t>);
423+
m_extra_row_info.set_partition_id(part_id_placeholder);
424+
if (event_type == UPDATE_ROWS_EVENT ||
425+
event_type == UPDATE_ROWS_EVENT_V1 ||
426+
event_type == PARTIAL_UPDATE_ROWS_EVENT) {
427+
READER_TRY_SET(part_id_placeholder, read<uint16_t>);
428+
m_extra_row_info.set_source_partition_id(part_id_placeholder);
411429
}
412430
break;
413431
}
@@ -440,10 +458,44 @@ Rows_event::Rows_event(const char *buf, const Format_description_event *fde)
440458
BAPI_VOID_RETURN;
441459
}
442460

443-
Rows_event::~Rows_event() {
444-
if (m_extra_row_data) {
445-
bapi_free(m_extra_row_data);
446-
m_extra_row_data = NULL;
461+
Rows_event::~Rows_event() {}
462+
463+
bool Rows_event::Extra_row_info::compare_extra_row_info(
464+
const unsigned char *ndb_info_arg, int part_id_arg,
465+
int source_part_id_arg) {
466+
const unsigned char *ndb_row_info = m_extra_row_ndb_info;
467+
bool ndb_info = ((ndb_info_arg == ndb_row_info) ||
468+
((ndb_info_arg != NULL) && (ndb_row_info != NULL) &&
469+
(ndb_info_arg[EXTRA_ROW_INFO_LEN_OFFSET] ==
470+
ndb_row_info[EXTRA_ROW_INFO_LEN_OFFSET]) &&
471+
(memcmp(ndb_info_arg, ndb_row_info,
472+
ndb_row_info[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
473+
474+
bool part_info = (part_id_arg == m_partition_id) &&
475+
(source_part_id_arg == m_source_partition_id);
476+
return part_info && ndb_info;
477+
}
478+
479+
size_t Rows_event::Extra_row_info::get_ndb_length() {
480+
if (have_ndb_info())
481+
return m_extra_row_ndb_info[EXTRA_ROW_INFO_LEN_OFFSET];
482+
else
483+
return 0;
484+
}
485+
486+
size_t Rows_event::Extra_row_info::get_part_length() {
487+
if (have_part()) {
488+
if (m_source_partition_id != UNDEFINED)
489+
return EXTRA_ROW_PART_INFO_VALUE_LENGTH * 2;
490+
return EXTRA_ROW_PART_INFO_VALUE_LENGTH;
491+
}
492+
return 0;
493+
}
494+
495+
Rows_event::Extra_row_info::~Extra_row_info() {
496+
if (have_ndb_info()) {
497+
bapi_free(m_extra_row_ndb_info);
498+
m_extra_row_ndb_info = nullptr;
447499
}
448500
}
449501

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# ==== Purpose ====
2+
#
3+
# Assert that the binary log contains a specific sequence of
4+
# partition ids for INSERT and DELETE statements
5+
#
6+
# ==== Usage ====
7+
#
8+
# --let $binlog_fullpath= TEXT
9+
# --let $binlog_position= NUMBER
10+
# --let $partition_id= NUMBER
11+
# --source include/rpl_partition_info.inc
12+
#
13+
# Parameters:
14+
#
15+
# $binlog_fullpath
16+
# The full path of binary log to look in.
17+
# (settable using include/save_binlog_position.inc)
18+
# $binlog_position
19+
# The log position to start reading from
20+
# (settable using include/save_binlog_position.inc)
21+
# $expected_partition_id
22+
# The partition_id where the row changes are done. It will contain
23+
# a single value or a semicolon seperated list of partition_id.
24+
# ==== References ====
25+
#
26+
# Bug#28712618:INSERT/UPDATE/DELETE WITH MULTIPLE ROWS IN SAME PARTITION CREATE MULTIPLE EVENT
27+
# WL#12168:Add Partition Information into the Binary Log
28+
29+
if ($expected_partition_id == '')
30+
{
31+
--die !!!ERROR IN TEST: you must set $expected_partition_id
32+
}
33+
34+
--let $include_filename= rpl_partition_info.inc
35+
--source include/begin_include_file.inc
36+
37+
--exec $MYSQL_BINLOG --start-position=$binlog_position -vv $binlog_fullpath > $MYSQLTEST_VARDIR/tmp/mysqlbinlog_partition.binlog
38+
--let $assert_file = $MYSQLTEST_VARDIR/tmp/mysqlbinlog_partition.binlog
39+
40+
--let $assert_select= partition:
41+
--let $assert_match= `SELECT CONCAT('partition: ', REPLACE('$expected_partition_id', ';', '.*\n[^\n]*partition: '))`
42+
--let $assert_text= assert_partition_id: $expected_partition_id
43+
--source include/assert_grep.inc
44+
45+
--let $expected_partition_id=
46+
--remove_file $MYSQLTEST_VARDIR/tmp/mysqlbinlog_partition.binlog
47+
--let $include_filename= rpl_partition_info.inc
48+
--source include/end_include_file.inc
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# ==== Purpose ====
2+
#
3+
# Assert that the binary log contains a specific sequence of partition ids
4+
# for UPDATE Events (having both a partition id for the
5+
# before-image($source_partition_id), and one for the after-image(target_partition_id)).
6+
#
7+
# ==== Usage ====
8+
#
9+
# --let $binlog_fullpath= TEXT
10+
# --let $binlog_position= NUMBER
11+
# --let $partition_id= NUMBER
12+
# --source include/rpl_partition_info.inc
13+
#
14+
# Parameters:
15+
#
16+
# $binlog_fullpath
17+
# The full path of binary log to look in.
18+
# (settable using include/save_binlog_position.inc)
19+
# $binlog_position
20+
# The log position to start reading from
21+
# (settable using include/save_binlog_position.inc)
22+
# $partition_id
23+
# This is a semicolon seperated list of pairs of partition_id.
24+
# Each pair consists of 'source_partition_id' as an integer
25+
# followed by space followed by 'target_partition_id' as an integar.
26+
# Note that this is whitespace-sensitive so there should
27+
# not be any other space characters than those separating
28+
# source_partition_id from target_partition_id.
29+
# ==== References ====
30+
#
31+
# Bug#28977199:UPDATE EVENT LOGS ONLY THE TARGET PARTITION WHEN MODIFYING MULTIPLE PARTITION
32+
# WL#12168:Add Partition Information into the Binary Log
33+
34+
35+
if ($partition_id == '')
36+
{
37+
--die !!!ERROR IN TEST: you must set $target_partition_id
38+
}
39+
40+
--let $include_filename= rpl_partition_info_update.inc
41+
--source include/begin_include_file.inc
42+
43+
--exec $MYSQL_BINLOG --start-position=$binlog_position -vv $binlog_fullpath > $MYSQLTEST_VARDIR/tmp/mysqlbinlog_partition.binlog
44+
--let $assert_file = $MYSQLTEST_VARDIR/tmp/mysqlbinlog_partition.binlog
45+
46+
--let $assert_select= source_partition:.*target_partition:.*
47+
--let $assert_match= `SELECT CONCAT('source_partition: ', REPLACE(REPLACE('$partition_id', ' ', '.*target_partition: '),';', '.*\n[^\n]*source_partition: '))`
48+
--let $assert_text= assert_partition_info: $partition_id
49+
--source include/assert_grep.inc
50+
51+
--let $partition_id=
52+
--remove_file $MYSQLTEST_VARDIR/tmp/mysqlbinlog_partition.binlog
53+
--let $include_filename= rpl_partition_info_update.inc
54+
--source include/end_include_file.inc
55+
56+

0 commit comments

Comments
 (0)