From 79a6ae47c759c649d20dbe9beb9bab9cd981e4ee Mon Sep 17 00:00:00 2001 From: wmttom <wmttom@gmail.com> Date: Mon, 14 Jul 2014 16:58:55 +0800 Subject: [PATCH 1/4] Fix 'utf8mb4' decode error. Fix decode error,if column.character_set_name == "utf8mb4". --- pymysqlreplication/row_event.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pymysqlreplication/row_event.py b/pymysqlreplication/row_event.py index eb7e4721..8ea9c6bc 100644 --- a/pymysqlreplication/row_event.py +++ b/pymysqlreplication/row_event.py @@ -166,6 +166,13 @@ def __read_string(self, size, column): str = str.decode(column.character_set_name) return str + def __read_string(self, size, column): + str = self.packet.read_length_coded_pascal_string(size) + if column.character_set_name is not None: + character_set_name = "utf8" if column.character_set_name == "utf8mb4" else column.character_set_name + str = str.decode(character_set_name) + return str + def __read_bit(self, column): """Read MySQL BIT type""" resp = "" From ce5a64a00712e13d0dc1513e132a083cd05e8968 Mon Sep 17 00:00:00 2001 From: wmttom <wmttom@gmail.com> Date: Mon, 14 Jul 2014 17:14:16 +0800 Subject: [PATCH 2/4] Support 'utf8mb4' charset --- pymysqlreplication/binlogstream.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pymysqlreplication/binlogstream.py b/pymysqlreplication/binlogstream.py index ecc02dce..1e5a701b 100644 --- a/pymysqlreplication/binlogstream.py +++ b/pymysqlreplication/binlogstream.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import sys import pymysql import pymysql.cursors import struct @@ -32,7 +33,12 @@ def __init__(self, connection_settings={}, resume_stream=False, log_pos: Set replication start log pos """ self.__connection_settings = connection_settings - self.__connection_settings["charset"] = "utf8" + # python UCS-4 use 'utf8mb4' charset + _sys_maxunicode = sys.maxunicode + if _sys_maxunicode == 65535: + self.__connection_settings["charset"] = "utf8" + elif _sys_maxunicode == 1114111: + self.__connection_settings["charset"] = "utf8mb4" self.__connected_stream = False self.__connected_ctl = False From 5dc211a9f8330f639d836da7a9afd68d06551611 Mon Sep 17 00:00:00 2001 From: tom <wmttom@gmail.com> Date: Wed, 11 Mar 2015 15:32:14 +0800 Subject: [PATCH 3/4] make charset configurable --- pymysqlreplication/binlogstream.py | 8 ++------ pymysqlreplication/row_event.py | 11 +++-------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/pymysqlreplication/binlogstream.py b/pymysqlreplication/binlogstream.py index a9e7df63..b9ef3b1e 100644 --- a/pymysqlreplication/binlogstream.py +++ b/pymysqlreplication/binlogstream.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import sys + import pymysql import struct @@ -54,12 +54,8 @@ def __init__(self, connection_settings, server_id, resume_stream=False, freeze_schema: If true do not support ALTER TABLE. It's faster. """ self.__connection_settings = connection_settings - # python UCS-4 use 'utf8mb4' charset - _sys_maxunicode = sys.maxunicode - if _sys_maxunicode == 65535: + if not connection_settings.get("charset"): self.__connection_settings["charset"] = "utf8" - elif _sys_maxunicode == 1114111: - self.__connection_settings["charset"] = "utf8mb4" self.__connected_stream = False self.__connected_ctl = False diff --git a/pymysqlreplication/row_event.py b/pymysqlreplication/row_event.py index 3c235497..be82ea6f 100644 --- a/pymysqlreplication/row_event.py +++ b/pymysqlreplication/row_event.py @@ -191,16 +191,11 @@ def __add_fsp_to_time(self, time, column): def __read_string(self, size, column): string = self.packet.read_length_coded_pascal_string(size) if column.character_set_name is not None: - string = string.decode(column.character_set_name) + character_set_name = "utf8" if column.character_set_name == "utf8mb4" \ + else column.character_set_name + string = string.decode(character_set_name) return string - def __read_string(self, size, column): - str = self.packet.read_length_coded_pascal_string(size) - if column.character_set_name is not None: - character_set_name = "utf8" if column.character_set_name == "utf8mb4" else column.character_set_name - str = str.decode(character_set_name) - return str - def __read_bit(self, column): """Read MySQL BIT type""" resp = "" From 8e5d19e2f3456d0352728b4a33c4be7ee598fcfc Mon Sep 17 00:00:00 2001 From: tom <wmttom@gmail.com> Date: Wed, 11 Mar 2015 15:45:07 +0800 Subject: [PATCH 4/4] delete blank line --- pymysqlreplication/binlogstream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymysqlreplication/binlogstream.py b/pymysqlreplication/binlogstream.py index b9ef3b1e..e5964a01 100644 --- a/pymysqlreplication/binlogstream.py +++ b/pymysqlreplication/binlogstream.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- - import pymysql import struct