4
4
5
5
import os
6
6
from collections import deque
7
- from concurrent .futures import Future , ThreadPoolExecutor , as_completed
8
7
from dataclasses import dataclass
9
8
from functools import partial
10
9
from itertools import chain
28
27
from ch_backup .exceptions import ClickhouseBackupError
29
28
from ch_backup .logic .backup_manager import BackupManager
30
29
from ch_backup .logic .upload_part_observer import UploadPartObserver
30
+ from ch_backup .storage .async_pipeline .base_pipeline .exec_pool import (
31
+ ThreadExecPool ,
32
+ )
31
33
from ch_backup .util import compare_schema
32
34
33
35
@@ -54,7 +56,7 @@ def backup(
54
56
databases : Sequence [Database ],
55
57
db_tables : Dict [str , list ],
56
58
schema_only : bool ,
57
- freeze_threads : int ,
59
+ multiprocessing_config : Dict ,
58
60
) -> None :
59
61
"""
60
62
Backup tables metadata, MergeTree data and Cloud storage metadata.
@@ -76,7 +78,7 @@ def backup(
76
78
db_tables [db .name ],
77
79
backup_name ,
78
80
schema_only ,
79
- freeze_threads ,
81
+ multiprocessing_config ,
80
82
)
81
83
82
84
def _collect_local_metadata_change_times (
@@ -110,7 +112,7 @@ def _backup(
110
112
tables : Sequence [str ],
111
113
backup_name : str ,
112
114
schema_only : bool ,
113
- freeze_threads : int ,
115
+ multiprocessing_config : Dict ,
114
116
) -> None :
115
117
"""
116
118
Backup single database tables.
@@ -133,33 +135,32 @@ def _backup(
133
135
# Create shadow/increment.txt if not exists manually to avoid
134
136
# race condition with parallel freeze
135
137
context .ch_ctl .create_shadow_increment ()
136
- futures : List [Future ] = []
137
- with ThreadPoolExecutor (max_workers = freeze_threads ) as pool :
138
+ with ThreadExecPool (
139
+ multiprocessing_config .get ("freeze_threads" , 1 )
140
+ ) as pool :
138
141
for table in tables_ :
139
- future = pool .submit (
142
+ pool .submit (
143
+ f'Freeze table "{ table .database } "."{ table .name } "' ,
140
144
TableBackup ._freeze_table ,
141
145
context ,
142
146
db ,
143
147
table ,
144
148
backup_name ,
145
149
schema_only ,
150
+ multiprocessing_config .get ("freeze_partition_threads" , 0 ),
146
151
)
147
- futures .append (future )
148
152
149
- for future in as_completed (futures ):
150
- table_and_create_statement = future .result ()
151
- if table_and_create_statement is not None :
152
- table , create_statement = table_and_create_statement
153
+ for freezed_table in pool .as_completed (keep_going = False ):
154
+ if freezed_table is not None :
153
155
self ._backup_freezed_table (
154
156
context ,
155
157
db ,
156
- table ,
158
+ freezed_table ,
157
159
backup_name ,
158
160
schema_only ,
159
161
change_times ,
160
- create_statement ,
161
162
)
162
- self ._backup_cloud_storage_metadata (context , table )
163
+ self ._backup_cloud_storage_metadata (context , freezed_table )
163
164
164
165
context .backup_layout .wait ()
165
166
context .ch_ctl .remove_freezed_data ()
@@ -174,13 +175,14 @@ def _freeze_table(
174
175
table : Table ,
175
176
backup_name : str ,
176
177
schema_only : bool ,
177
- ) -> Optional [Tuple [Table , bytes ]]:
178
+ freeze_partition_threads : int ,
179
+ ) -> Optional [Table ]:
178
180
"""
179
181
Freeze table and return it's create statement
180
182
"""
181
183
logging .debug ('Trying to freeze "{}"."{}"' , table .database , table .name )
182
-
183
184
create_statement = TableBackup ._load_create_statement_from_disk (table )
185
+ table .create_statement = create_statement or ""
184
186
if not create_statement :
185
187
logging .warning (
186
188
'Skipping table backup for "{}"."{}". Local metadata is empty or absent' ,
@@ -192,7 +194,9 @@ def _freeze_table(
192
194
# Freeze only MergeTree tables
193
195
if not schema_only and table .is_merge_tree ():
194
196
try :
195
- context .ch_ctl .freeze_table (backup_name , table )
197
+ context .ch_ctl .freeze_table (
198
+ backup_name , table , freeze_partition_threads
199
+ )
196
200
except ClickhouseError :
197
201
if context .ch_ctl .does_table_exist (table .database , table .name ):
198
202
logging .error (
@@ -209,10 +213,10 @@ def _freeze_table(
209
213
)
210
214
return None
211
215
212
- return ( table , create_statement )
216
+ return table
213
217
214
218
@staticmethod
215
- def _load_create_statement_from_disk (table : Table ) -> Optional [bytes ]:
219
+ def _load_create_statement_from_disk (table : Table ) -> Optional [str ]:
216
220
"""
217
221
Load a create statement of the table from a metadata file on the disk.
218
222
"""
@@ -224,7 +228,7 @@ def _load_create_statement_from_disk(table: Table) -> Optional[bytes]:
224
228
)
225
229
return None
226
230
try :
227
- return Path (table .metadata_path ).read_bytes ( )
231
+ return Path (table .metadata_path ).read_text ( "utf-8" )
228
232
except OSError as e :
229
233
logging .debug (
230
234
'Cannot load a create statement of the table "{}"."{}": {}' ,
@@ -378,7 +382,6 @@ def _backup_freezed_table(
378
382
backup_name : str ,
379
383
schema_only : bool ,
380
384
change_times : Dict [str , TableMetadataChangeTime ],
381
- create_statement : bytes ,
382
385
) -> None :
383
386
# Check if table metadata was updated
384
387
new_change_time = self ._get_change_time (table .metadata_path )
@@ -400,7 +403,7 @@ def _backup_freezed_table(
400
403
)
401
404
# Backup table metadata
402
405
context .backup_layout .upload_table_create_statement (
403
- context .backup_meta , db , table , create_statement
406
+ context .backup_meta , db , table
404
407
)
405
408
# Backup table data
406
409
if not schema_only :
0 commit comments