Skip to content

Commit 9b3ce0f

Browse files
dentinyJelteF
andauthored
BIT/VARBIT type conversion between pg and duckdb (#628)
Resolves #597 --------- Co-authored-by: Jelte Fennema-Nio <[email protected]>
1 parent b281700 commit 9b3ce0f

File tree

10 files changed

+283
-3
lines changed

10 files changed

+283
-3
lines changed

Diff for: docs/types.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Able to read many [data types](https://www.postgresql.org/docs/current/datatype.
66
- Floating point types (`real`, `double precision`)
77
- `numeric` (might get converted to `double precision` internally see known limitations below for details)
88
- `text`/`varchar`/`bpchar`
9+
- `bit` related types, including both fixed and varied sized bit array
910
- `bytea`/`blob`
1011
- `timestamp`/`timstampz`/`date`/`interval`/`timestamp_ns`/`timestamp_ms`/`timestamp_s`
1112
- `boolean`

Diff for: include/pgduckdb/pg/types.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ bool IsDomainType(Oid type_oid);
88
bool IsArrayDomainType(Oid type_oid);
99
Oid GetBaseDuckColumnType(Oid attribute_type_oid);
1010
Datum StringToNumeric(const char *str);
11+
Datum StringToVarbit(const char *str);
12+
const char *VarbitToString(Datum pg_varbit);
1113
} // namespace pgduckdb::pg

Diff for: src/pg/types.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,26 @@ StringToNumeric(const char *str) {
6767
return PostgresFunctionGuard(StringToNumeric_C, str);
6868
}
6969

70+
static Datum
71+
StringToVarbit_C(const char *str) {
72+
Datum pg_varbit = DirectFunctionCall3(varbit_in, CStringGetDatum(str), /*typelen=*/ObjectIdGetDatum(VARBITOID),
73+
/*typmod=*/Int32GetDatum(-1));
74+
return pg_varbit;
75+
}
76+
77+
Datum
78+
StringToVarbit(const char *str) {
79+
return PostgresFunctionGuard(StringToVarbit_C, str);
80+
}
81+
82+
static const char *
83+
VarbitToString_C(Datum pg_bitstring) {
84+
return DatumGetCString(DirectFunctionCall1(varbit_out, pg_bitstring));
85+
}
86+
87+
const char *
88+
VarbitToString(Datum pg_bitstring) {
89+
return PostgresFunctionGuard(VarbitToString_C, pg_bitstring);
90+
}
91+
7092
} // namespace pgduckdb::pg

Diff for: src/pgduckdb_types.cpp

+65-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#include "duckdb.hpp"
22
#include "duckdb/common/shared_ptr.hpp"
33
#include "duckdb/common/extra_type_info.hpp"
4-
#include "duckdb/common/types/uuid.hpp"
4+
#include "duckdb/common/types/bit.hpp"
55
#include "duckdb/common/types/blob.hpp"
6+
#include "duckdb/common/types/uuid.hpp"
67

78
#include "pgduckdb/pgduckdb_types.hpp"
89
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
@@ -30,6 +31,7 @@ extern "C" {
3031
#include "utils/syscache.h"
3132
#include "utils/timestamp.h"
3233
#include "utils/uuid.h"
34+
#include "utils/varbit.h"
3335
}
3436

3537
#include "pgduckdb/pgduckdb_detoast.hpp"
@@ -158,6 +160,18 @@ struct DecimalConversionDouble {
158160
}
159161
};
160162

163+
// Util function to convert duckdb `BIT` value to postgres bitstring type.
164+
// There're two possible corresponding types `BITOID` and `VARBITOID`, here we convert to `VARBITOID` for generality.
165+
static Datum
166+
ConvertVarbitDatum(const duckdb::Value &value) {
167+
const std::string value_str = value.ToString();
168+
169+
// Here we rely on postgres conversion function, instead of manual parsing, because BIT string type involves padding
170+
// and duckdb/postgres handle it differently, it's non-trivial to memcpy the bits.
171+
Datum pg_varbit = pgduckdb::pg::StringToVarbit(value_str.c_str());
172+
return pg_varbit;
173+
}
174+
161175
static inline bool
162176
ValidDate(duckdb::date_t dt) {
163177
if (dt == duckdb::date_t::infinity() || dt == duckdb::date_t::ninfinity())
@@ -522,6 +536,17 @@ DatumGetInterval(Datum value) {
522536
return duck_interval;
523537
}
524538

539+
static std::string
540+
DatumGetBitString(Datum value) {
541+
// Here we rely on postgres conversion function, instead of manual parsing,
542+
// because BIT string type involves padding and duckdb/postgres handle it
543+
// differently, it's non-trivial to memcpy the bits.
544+
//
545+
// NOTE: We use VarbitToString here, because BIT and VARBIT are both stored
546+
// internally as a VARBIT in postgres.
547+
return std::string(pgduckdb::pg::VarbitToString(value));
548+
}
549+
525550
static duckdb::dtime_t
526551
DatumGetTime(Datum value) {
527552
const TimeADT pg_time = DatumGetTimeADT(value);
@@ -673,6 +698,19 @@ struct PostgresTypeTraits<INTERVALOID> {
673698
}
674699
};
675700

701+
// BIT type
702+
template <>
703+
struct PostgresTypeTraits<VARBITOID> {
704+
static constexpr int16_t typlen = -1;
705+
static constexpr bool typbyval = false;
706+
static constexpr char typalign = 'i';
707+
708+
static inline Datum
709+
ToDatum(const duckdb::Value &val) {
710+
return ConvertVarbitDatum(val);
711+
}
712+
};
713+
676714
// TIME type
677715
template <>
678716
struct PostgresTypeTraits<TIMEOID> {
@@ -803,6 +841,7 @@ using DateArray = PODArray<PostgresOIDMapping<DATEOID>>;
803841
using TimestampArray = PODArray<PostgresOIDMapping<TIMESTAMPOID>>;
804842
using TimestampTzArray = PODArray<PostgresOIDMapping<TIMESTAMPTZOID>>;
805843
using IntervalArray = PODArray<PostgresOIDMapping<INTERVALOID>>;
844+
using BitArray = PODArray<PostgresOIDMapping<VARBITOID>>;
806845
using TimeArray = PODArray<PostgresOIDMapping<TIMEOID>>;
807846
using TimeTzArray = PODArray<PostgresOIDMapping<TIMETZOID>>;
808847
using UUIDArray = PODArray<PostgresOIDMapping<UUIDOID>>;
@@ -977,6 +1016,11 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
9771016
Oid oid = slot->tts_tupleDescriptor->attrs[col].atttypid;
9781017

9791018
switch (oid) {
1019+
case BITOID:
1020+
case VARBITOID: {
1021+
slot->tts_values[col] = ConvertVarbitDatum(value);
1022+
break;
1023+
}
9801024
case BOOLOID:
9811025
slot->tts_values[col] = ConvertBoolDatum(value);
9821026
break;
@@ -1088,6 +1132,11 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
10881132
ConvertDuckToPostgresArray<IntervalArray>(slot, value, col);
10891133
break;
10901134
}
1135+
case BITARRAYOID:
1136+
case VARBITARRAYOID: {
1137+
ConvertDuckToPostgresArray<BitArray>(slot, value, col);
1138+
break;
1139+
}
10911140
case TIMEARRAYOID: {
10921141
ConvertDuckToPostgresArray<TimeArray>(slot, value, col);
10931142
break;
@@ -1189,6 +1238,11 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
11891238
case INTERVALOID:
11901239
case INTERVALARRAYOID:
11911240
return duckdb::LogicalTypeId::INTERVAL;
1241+
case BITOID:
1242+
case BITARRAYOID:
1243+
case VARBITOID:
1244+
case VARBITARRAYOID:
1245+
return duckdb::LogicalTypeId::BIT;
11921246
case TIMEOID:
11931247
case TIMEARRAYOID:
11941248
return duckdb::LogicalTypeId::TIME;
@@ -1307,6 +1361,8 @@ GetPostgresArrayDuckDBType(const duckdb::LogicalType &type) {
13071361
return TIMESTAMPTZARRAYOID;
13081362
case duckdb::LogicalTypeId::INTERVAL:
13091363
return INTERVALARRAYOID;
1364+
case duckdb::LogicalTypeId::BIT:
1365+
return VARBITARRAYOID;
13101366
case duckdb::LogicalTypeId::TIME:
13111367
return TIMEARRAYOID;
13121368
case duckdb::LogicalTypeId::TIME_TZ:
@@ -1365,6 +1421,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) {
13651421
return TIMESTAMPTZOID;
13661422
case duckdb::LogicalTypeId::INTERVAL:
13671423
return INTERVALOID;
1424+
case duckdb::LogicalTypeId::BIT:
1425+
return VARBITOID;
13681426
case duckdb::LogicalTypeId::TIME:
13691427
return TIMEOID;
13701428
case duckdb::LogicalTypeId::TIME_TZ:
@@ -1607,6 +1665,9 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) {
16071665
duckdb::timestamp_tz_t(DatumGetTimestampTz(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
16081666
case INTERVALOID:
16091667
return duckdb::Value::INTERVAL(DatumGetInterval(value));
1668+
case BITOID:
1669+
case VARBITOID:
1670+
return duckdb::Value::BIT(DatumGetBitString(value));
16101671
case TIMEOID:
16111672
return duckdb::Value::TIME(DatumGetTime(value));
16121673
case TIMETZOID:
@@ -1669,6 +1730,9 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i
16691730
case duckdb::LogicalTypeId::INTERVAL:
16701731
Append<duckdb::interval_t>(result, DatumGetInterval(value), offset);
16711732
break;
1733+
case duckdb::LogicalTypeId::BIT:
1734+
Append<duckdb::bitstring_t>(result, duckdb::Bit::ToBit(DatumGetBitString(value)), offset);
1735+
break;
16721736
case duckdb::LogicalTypeId::TIME:
16731737
Append<duckdb::dtime_t>(result, DatumGetTime(value), offset);
16741738
break;

Diff for: test/regression/expected/array_type_support.out

+74
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,40 @@ SELECT * FROM varchar_array_1d;
147147
{}
148148
(4 rows)
149149

150+
-- VARBIT (single dimension)
151+
CREATE TABLE varbit_array_1d(a VARBIT[]);
152+
INSERT INTO varbit_array_1d SELECT CAST(a as VARBIT[]) FROM (VALUES
153+
('{B1010, B10100011}'),
154+
(NULL),
155+
('{B1010001011, NULL, B10101010101}'),
156+
('{}')
157+
) t(a);
158+
SELECT * FROM varbit_array_1d;
159+
a
160+
-------------------------------
161+
{1010,10100011}
162+
163+
{1010001011,NULL,10101010101}
164+
{}
165+
(4 rows)
166+
167+
-- BIT (single dimension)
168+
CREATE TABLE bit_array_1d(a BIT(4)[]);
169+
INSERT INTO bit_array_1d SELECT CAST(a as BIT(4)[]) FROM (VALUES
170+
('{B1010, B0101}'),
171+
(NULL),
172+
('{B1010, NULL, B0111}'),
173+
('{}')
174+
) t(a);
175+
SELECT * FROM bit_array_1d;
176+
a
177+
------------------
178+
{1010,0101}
179+
180+
{1010,NULL,0111}
181+
{}
182+
(4 rows)
183+
150184
-- INTERVAL (single dimension)
151185
CREATE TABLE interval_array_1d(a INTERVAL[]);
152186
INSERT INTO interval_array_1d (a) VALUES (ARRAY['2 years 5 months 1 day 3 hours 30 minutes 5 seconds', '5 days 5 hours']::INTERVAL[]);
@@ -378,6 +412,42 @@ SELECT * FROM varchar_array_2d;
378412
{{some,strings},{NULL,last}}
379413
(5 rows)
380414

415+
-- VARBIT (two dimensions)
416+
CREATE TABLE varbit_array_2d(a VARBIT[][]);
417+
INSERT INTO varbit_array_2d VALUES
418+
('{{B1010,B10100011},{B1010101,B101010101}}'),
419+
('{{B101010101,B10101010101,B1010101010101},{B101010101010101,B10101010101010101,B1010101010101010101}}'),
420+
(NULL),
421+
('{}'),
422+
('{{B101010101,B10101010101},{NULL,B1010101010101}}');
423+
SELECT * FROM varbit_array_2d;
424+
a
425+
-------------------------------------------------------------------------------------------------
426+
{{1010,10100011},{1010101,101010101}}
427+
{{101010101,10101010101,1010101010101},{101010101010101,10101010101010101,1010101010101010101}}
428+
429+
{}
430+
{{101010101,10101010101},{NULL,1010101010101}}
431+
(5 rows)
432+
433+
CREATE TABLE bit_array_2d(a BIT(4)[][]);
434+
INSERT INTO bit_array_2d SELECT CAST(a as BIT(4)[][]) FROM (VALUES
435+
('{{B1010, B0101},{B0000, B0111}}'),
436+
('{{B1010, B0101, B1111},{B1010, B0101, B0000}}'),
437+
(NULL),
438+
('{}'),
439+
('{{B1010, NULL},{B0111, B0000}}')
440+
) t(a);
441+
SELECT * FROM bit_array_2d;
442+
a
443+
-------------------------------------
444+
{{1010,0101},{0000,0111}}
445+
{{1010,0101,1111},{1010,0101,0000}}
446+
447+
{}
448+
{{1010,NULL},{0111,0000}}
449+
(5 rows)
450+
381451
-- BYTEA (single dimension)
382452
CREATE TABLE bytea_array_1d (a bytea[]);
383453
INSERT INTO bytea_array_1d (a)
@@ -540,6 +610,8 @@ DROP TABLE bool_array_1d;
540610
DROP TABLE char_array_1d;
541611
DROP TABLE smallint_array_1d;
542612
DROP TABLE varchar_array_1d;
613+
DROP TABLE varbit_array_1d;
614+
DROP TABLE bit_array_1d;
543615
DROP TABLE interval_array_1d;
544616
DROP TABLE time_array_1d;
545617
DROP TABLE timetz_array_1d;
@@ -554,6 +626,8 @@ DROP TABLE regclass_array_1d;
554626
DROP TABLE char_array_2d;
555627
DROP TABLE smallint_array_2d;
556628
DROP TABLE varchar_array_2d;
629+
DROP TABLE varbit_array_2d;
630+
DROP TABLE bit_array_2d;
557631
DROP TABLE interval_array_2d;
558632
DROP TABLE time_array_2d;
559633
DROP TABLE timetz_array_2d;

Diff for: test/regression/expected/test_all_types.out

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ SET bytea_output = 'escape';
33
SELECT * FROM duckdb.query($$
44
FROM test_all_types()
55
SELECT * exclude(
6-
bit,
76
small_enum,
87
medium_enum,
98
large_enum,
@@ -50,6 +49,7 @@ uuid | 00000000-0000-0000-0000-000000000000
5049
interval | @ 0
5150
varchar | 🦆🦆🦆🦆🦆🦆
5251
blob | thisisalongblob\000withnullbytes
52+
bit | 0010001001011100010101011010111
5353
int_array | {}
5454
double_array | {}
5555
date_array | {}
@@ -85,6 +85,7 @@ uuid | ffffffff-ffff-ffff-ffff-ffffffffffff
8585
interval | @ 83 years 3 mons 999 days 16 mins 39.999999 secs
8686
varchar | goo
8787
blob | \000\000\000a
88+
bit | 10101
8889
int_array | {42,999,NULL,NULL,-42}
8990
double_array | {42,NaN,Infinity,-Infinity,NULL,-42}
9091
date_array | {01-01-1970,infinity,-infinity,NULL,05-12-2022}
@@ -120,6 +121,7 @@ uuid |
120121
interval |
121122
varchar |
122123
blob |
124+
bit |
123125
int_array |
124126
double_array |
125127
date_array |

Diff for: test/regression/expected/type_support.out

+49
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,52 @@ SELECT * FROM interval_tbl WHERE a = '5 day 5 hours'::INTERVAL;
134134
@ 5 days 5 hours
135135
(1 row)
136136

137+
-- VARBIT
138+
CREATE TABLE varbit_tbl(a VARBIT);
139+
-- Insert a few kinds of bitstrings: (1) less than 8 bits; (2) equal to 8 bits; (3) larger than 8 bits.
140+
INSERT INTO varbit_tbl SELECT CAST(a AS VARBIT) FROM (VALUES (B'1010'::VARBIT), (B'10100011'::VARBIT), (B'1010001011'::VARBIT), (NULL)) t(a);
141+
SELECT * FROM varbit_tbl;
142+
a
143+
------------
144+
1010
145+
10100011
146+
1010001011
147+
148+
(4 rows)
149+
150+
CREATE TABLE varbit20_tbl(a BIT VARYING(20));
151+
-- Insert a few kinds of bitstrings: (1) less than 8 bits; (2) equal to 8 bits; (3) larger than 8 bits.
152+
INSERT INTO varbit20_tbl SELECT CAST(a AS VARBIT) FROM (VALUES (B'1010'::VARBIT), (B'10100011'::VARBIT), (B'1010001011'::VARBIT), (NULL)) t(a);
153+
SELECT * FROM varbit20_tbl;
154+
a
155+
------------
156+
1010
157+
10100011
158+
1010001011
159+
160+
(4 rows)
161+
162+
-- BIT
163+
CREATE TABLE bit_tbl(a BIT(4));
164+
INSERT INTO bit_tbl VALUES (B'1010'), (B'0101'), (NULL);
165+
SELECT * FROM bit_tbl;
166+
a
167+
------
168+
1010
169+
0101
170+
171+
(3 rows)
172+
173+
CREATE TABLE bit14_tbl(a BIT(14));
174+
INSERT INTO bit14_tbl VALUES (B'10101010101010'), (B'11111111111111'), (NULL);
175+
SELECT * FROM bit14_tbl;
176+
a
177+
----------------
178+
10101010101010
179+
11111111111111
180+
181+
(3 rows)
182+
137183
-- TIME
138184
CREATE TABLE time_tbl(a TIME);
139185
INSERT INTO time_tbl SELECT CAST(a AS TIME) FROM (VALUES ('13:45:30'::TIME), ('08:15:00'::TIME), (NULL)) t(a);
@@ -446,6 +492,9 @@ DROP TABLE varchar_tbl;
446492
DROP TABLE text_tbl;
447493
DROP TABLE date_tbl;
448494
DROP TABLE interval_tbl;
495+
DROP TABLE varbit_tbl;
496+
DROP TABLE varbit20_tbl;
497+
DROP TABLE bit_tbl;
449498
DROP TABLE time_tbl;
450499
DROP TABLE timetz_tbl;
451500
DROP TABLE timestamp_tbl;

0 commit comments

Comments
 (0)