Skip to content

Commit 6fbb257

Browse files
committed
make unions 'chunked'
1 parent efb68a2 commit 6fbb257

File tree

3 files changed

+44
-19
lines changed

3 files changed

+44
-19
lines changed

lang/types.md

+8-3
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,15 @@ enum Type {
5454
count: BigInt,
5555
}
5656
Union {
57-
/// Fields *may* overlap.
57+
/// Fields *may* overlap. Fields only exist for field access place projections,
58+
/// they are irrelevant for the representation relation.
5859
fields: Fields,
59-
/// The total size of the type can indicate trailing padding.
60-
/// Must be large enough to contain all fields.
60+
/// A union can be split into multiple "chunks", where only the data inside those chunks is
61+
/// preserved, and data between chunks is lost (like padding in a struct).
62+
/// This is necessary to model the behavior of some `repr(C)` unions, see
63+
/// <https://github.com/rust-lang/unsafe-code-guidelines/issues/156> for details.
64+
chunks: List<(Size, Size)>, // (offset, length) for each chunk.
65+
/// The total size of the union, can indicate padding after the last chunk.
6166
size: Size,
6267
},
6368
Enum {

lang/values.md

+26-14
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ enum Value {
2828
idx: BigInt,
2929
data: Value,
3030
},
31-
/// A "bag of bytes", used for unions.
32-
Bytes(List<AbstractByte>),
31+
/// Unions are represented as "lists of chunks", where each chunk is just a raw list of bytes.
32+
Union(List<List<AbstractByte>>),
3333
}
3434
```
3535

@@ -192,19 +192,19 @@ For simplicity, we only define pairs for now.
192192
impl Type {
193193
fn decode(Tuple { fields: [field1, field2], size }: Self, bytes: List<AbstractByte>) -> Option<Value> {
194194
if bytes.len() != size { yeet!(); }
195-
let (size1, type1) = field1;
196-
let val1 = type1.decode(bytes[size1..][..type1.size()]);
197-
let (size2, type2) = field2;
198-
let val2 = type1.decode(bytes[size2..][..type2.size()]);
195+
let (offset1, type1) = field1;
196+
let val1 = type1.decode(bytes[offset1..][..type1.size()]);
197+
let (offset2, type2) = field2;
198+
let val2 = type1.decode(bytes[offset2..][..type2.size()]);
199199
Value::Tuple([val1, val2])
200200
}
201201
fn encode(Tuple { fields: [field1, field2], size }: Self, val: Value) -> List<AbstractByte> {
202202
let Value::Tuple([val1, val2]) = val else { panic!() };
203203
let mut bytes = [AbstractByte::Uninit; size];
204-
let (size1, type1) = field1;
205-
bytes[size1..][..type1.size()] = type1.encode(val1);
206-
let (size2, type2) = field2;
207-
bytes[size2..][..type2.size()] = type2.encode(val2);
204+
let (offset1, type1) = field1;
205+
bytes[offset1..][..type1.size()] = type1.encode(val1);
206+
let (offset2, type2) = field2;
207+
bytes[offset2..][..type2.size()] = type2.encode(val2);
208208
bytes
209209
}
210210
}
@@ -224,12 +224,24 @@ A union simply stores the bytes directly, no high-level interpretation of data h
224224

225225
```rust
226226
impl Type {
227-
fn decode(Union { size, .. }: Self, bytes: List<AbstractByte>) -> Option<Value> {
227+
fn decode(Union { size, chunks, .. }: Self, bytes: List<AbstractByte>) -> Option<Value> {
228228
if bytes.len() != size { yeet!(); }
229-
Value::Bytes(bytes)
229+
let mut chunk_data = list![];
230+
// Store the data from each chunk.
231+
for (offset, size) in chunks {
232+
chunk_data.push(bytes[offset..][..size]);
233+
}
234+
Value::Union(chunk_data)
230235
}
231-
fn encode(Union { size, .. }: Self, value: Value) -> List<AbstractByte> {
232-
let Value::Bytes(bytes) = val else { panic!() };
236+
fn encode(Union { size, chunks, .. }: Self, value: Value) -> List<AbstractByte> {
237+
let Value::Union(chunk_data) = val else { panic!() };
238+
assert_eq!(chunk_data.len(), chunks.len());
239+
let mut bytes = [AbstractByte::Uninit; size];
240+
// Restore the data from each chunk.
241+
for ((offset, size), data) in chunks.iter().zip(chunk_data.iter()) {
242+
assert_eq!(data.len(), size);
243+
bytes[offset..][..size] = data;
244+
}
233245
bytes
234246
}
235247
}

lang/well-formed.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,20 @@ impl Type {
6161
elem.check()?;
6262
elem.size().checked_mul(count)?;
6363
}
64-
Union { fields, size } => {
65-
// These may overlap, but they must all fit the size.
64+
Union { fields, size, chunks } => {
65+
// The fields may overlap, but they must all fit the size.
6666
for (offset, type) in fields {
6767
type.check()?;
6868
ensure(size >= offset.checked_add(type.size())?)?;
6969
}
70+
// The chunks must be disjoint.
71+
let mut last_end = Size::ZERO;
72+
for (offset, size) in chunks {
73+
ensure(offset >= last_end)?;
74+
last_end = offset.checked_add(size)?;
75+
}
76+
// And they must all fit into the size.
77+
ensure(size >= last_end)?;
7078
}
7179
Enum { variants, size, tag_encoding: _ } => {
7280
for variant in variants {

0 commit comments

Comments
 (0)