Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the SnapshotMetadata service. #551

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions csi.proto
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,16 @@ service GroupController {
}
}

service SnapshotMetadata {
option (alpha_service) = true;

rpc GetMetadataAllocated(GetMetadataAllocatedRequest)
returns (stream GetMetadataAllocatedResponse) {}

rpc GetMetadataDelta(GetMetadataDeltaRequest)
returns (stream GetMetadataDeltaResponse) {}
}

service Node {
rpc NodeStageVolume (NodeStageVolumeRequest)
returns (NodeStageVolumeResponse) {}
Expand Down Expand Up @@ -220,6 +230,14 @@ message PluginCapability {
// well as specific RPCs as indicated by
// GroupControllerGetCapabilities.
GROUP_CONTROLLER_SERVICE = 3 [(alpha_enum_value) = true];

// SNAPSHOT_METADATA_SERVICE indicates that the Plugin provides
// RPCs to retrieve metadata on the allocated blocks of a single
// snapshot, or the changed blocks between a pair of snapshots of
// the same block volume.
// The presence of this capability determines whether the CO will
// attempt to invoke the OPTIONAL SnapshotMetadata service RPCs.
SNAPSHOT_METADATA_SERVICE = 4 [(alpha_enum_value) = true];
}
Type type = 1;
}
Expand Down Expand Up @@ -1913,3 +1931,173 @@ message GetVolumeGroupSnapshotResponse {
// This field is REQUIRED
VolumeGroupSnapshot group_snapshot = 1;
}
// BlockMetadata specifies a data range.
message BlockMetadata {
// This is the zero based byte position in the volume or snapshot,
// measured from the start of the object.
// This field is REQUIRED.
int64 byte_offset = 1;

// This is the size of the data range.
// size_bytes MUST be greater than zero.
// This field is REQUIRED.
int64 size_bytes = 2;
}
enum BlockMetadataType {
UNKNOWN = 0;

// The FIXED_LENGTH value indicates that data ranges are
// returned in fixed size blocks.
FIXED_LENGTH = 1;

// The VARIABLE_LENGTH value indicates that data ranges
// are returned in potentially variable sized extents.
VARIABLE_LENGTH = 2;
}
// The GetMetadataAllocatedRequest message is used to solicit metadata
// on the allocated blocks of a snapshot: i.e. this identifies the
// data ranges that have valid data as they were the target of some
// previous write operation on the volume.
message GetMetadataAllocatedRequest {
// This is the identifier of the snapshot.
// This field is REQUIRED.
string snapshot_id = 1;

// This indicates the zero based starting byte position in the volume
// snapshot from which the result should be computed.
// It is intended to be used to continue a previously interrupted
// call.
// The CO SHOULD specify this value to be the offset of the byte
// position immediately after the last byte of the last data range
// received, if continuing an interrupted operation, or zero if not.
// The SP MUST ensure that the returned response stream does not
// contain BlockMetadata tuples that end before the requested
// starting_offset: i.e. if S is the requested starting_offset, and
// B0 is block_metadata[0] of the first message in the response
// stream, then (S < B0.byte_offset + B0.size_bytes) must be true.
// This field is REQUIRED.
int64 starting_offset = 2;

// This is an optional parameter, and if non-zero it specifies the
// maximum number of tuples to be returned in each
// GetMetadataAllocatedResponse message returned by the RPC stream.
// The plugin will determine an appropriate value if 0, and is
// always free to send less than the requested value.
// This field is OPTIONAL.
int32 max_results = 3;

// Secrets required by plugin to complete the request.
// This field is OPTIONAL. Refer to the `Secrets Requirements`
// section on how to use this field.
map<string, string> secrets = 4 [(csi_secret) = true];
}

// GetMetadataAllocatedResponse messages are returned in a gRPC stream.
// Cumulatively, they provide information on the allocated data
// ranges in the snapshot.
message GetMetadataAllocatedResponse {
// This specifies the style used in the BlockMetadata sequence.
// This value must be the same in all such messages returned by
// the stream.
// If block_metadata_type is FIXED_LENGTH, then the size_bytes field
// of each message in the block_metadata list MUST be constant.
// This field is REQUIRED.
BlockMetadataType block_metadata_type = 1;

// This returns the capacity of the underlying volume in bytes.
// This value must be the same in all such messages returned by
// the stream.
// This field is REQUIRED.
int64 volume_capacity_bytes = 2;

// This is a list of data range tuples.
// If the value of max_results in the GetMetadataAllocatedRequest
// message is greater than zero, then the number of entries in this
// list MUST be less than or equal to that value.
// The SP MUST respect the value of starting_offset in the request.
// The byte_offset fields of adjacent BlockMetadata messages
// MUST be strictly increasing and messages MUST NOT overlap:
// i.e. for any two BlockMetadata messages, A and B, if A is returned
// before B, then (A.byte_offset + A.size_bytes <= B.byte_offset)
// MUST be true.
// This MUST also be true if A and B are from block_metadata lists in
// different GetMetadataAllocatedResponse messages in the gRPC stream.
// This field is OPTIONAL.
repeated BlockMetadata block_metadata = 3;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
repeated BlockMetadata block_metadata = 3;
repeated BlockMetadata block_metadata = 3;
// Indicates there are no more allocated blocks in the list.
// This field is REQUIRED.
bool end_of_list = 4;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I understand it, a gRPC stream returns an EOF on proper termination and an error otherwise. Would that not suffice?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern here is when max_results is specified, and you get an EOF, how do you know if you need to call it one more time? Or even if max_results is set to zero, can the SP decide to send less than all the results in one stream?

During my review I was going to add like a whole paragraph explaining the relationship between the max_results field and the number of elements in the block_metadata field, and I decided it was too confusing and it would be better to make it explicit.

The core of the problem is that we've introduced BOTH pagination and streaming and this means the client can't rely on the stream ending to indicate that there are no more pages.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The core of the problem is that we've introduced BOTH pagination and streaming and this means the client can't rely on the stream ending to indicate that there are no more pages.

Hmm ... I'm not sure I understand your confusion! Perhaps we can use this snippet of Go client prototype code (written by @PrasadG193) to dissect the problem?

	c.initGRPCClient(snapMetaSvc.Spec.CACert, snapMetaSvc.Spec.Address, saToken, snapNamespace)
	stream, err := c.client.GetDelta(ctx, &pgrpc.GetDeltaRequest{
		BaseSnapshotId:   snapNamespace + "/" + baseVolumeSnapshot,
		TargetSnapshotId: snapNamespace + "/" + targetVolumeSnapshot,
		StartingOffset:   0,
		MaxResults:       uint32(256),
	})
	if err != nil {
		return err
	}
	done := make(chan bool)
	fmt.Println("Resp received:")
	go func() {
		for {
			resp, err := stream.Recv()
			if err == io.EOF {
				done <- true //means stream is finished
				return
			}
			if err != nil {
				log.Fatalf("cannot receive %v", err)
			}
			respJson, _ := json.Marshal(resp)
			fmt.Println(string(respJson))
		}
	}()

	<-done //we will wait until all response is received

The example above doesn't really illustrate processing of the multiple entries in the returned block_metadata slice, as the response is simply dumped as a string in JSON format, but it is clear that instead of the fmt.Println a real client would process the block_metadata slice ("page") in the resp. The stream processing loop itself is exited only on a non-nil err value from stream.Recv(), where io.EOF is not really an error but the formal proper end-of-stream indicator.

The corresponding server (sidecar) side logic for this primarily consists of a loop doing this:

func (s *Server) GetDelta(req *pgrpc.GetDeltaRequest, cbtClientStream pgrpc.SnapshotMetadata_GetDeltaServer) error {
	... // call the SP service after assembling the parameters
	done := make(chan bool)
	go func() {
		for {
			... // receive resp from the SP service
			log.Print("Received response from csi driver, proxying to client")
			if err := cbtClientStream.Send(resp); err != nil {
				log.Printf(fmt.Sprintf("cannot send %v", err))
				return
			}
		}
	}()
	<-done //we will wait until all response is received
	return nil
}

(The prototype sidecar has very little error handling)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see. You're using an error to signal a normal condition. So you always take an extra trip through the loop, except when there are are no records to return. My main concern was how we signal termination to the client, and the error semantics don't show up in the protobuf definitions so I overlooked it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it appears to be the idiomatic way in Go.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't hate this way of doing it, but even if it's idiomatic in Go, I'm not sure we use errors in this way anywhere else in CSI. I just want to highlight that a boolean flag in the return message would achieve the same effect as using an error to signal the end of iteration.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not both? Conventionally, client side should keep the connection alive until io.EOF, to avoid leaks. The end_of_list is a confirmation from the server to the client that everything has been sent. If there is an io.EOF but end_of_list is false, then the client knows it got an incomplete list, and will need to handle it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, its a bit paranoic!

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of a server-side graceful termination scenario, where the sidecar (or gRPC) had a chance to send an io.EOF before an unexpected termination, and not all the block metadata has been retrieved from the driver plugin. There is no way for the backup software to know the list it got is incomplete. Is there not a valid scenario?

}
// The GetMetadataDeltaRequest message is used to solicit metadata on
// the data ranges that have changed between two snapshots.
message GetMetadataDeltaRequest {
// This is the identifier of the snapshot against which changes
// are to be computed.
// This field is REQUIRED.
string base_snapshot_id = 1;

// This is the identifier of a second snapshot in the same volume,
// created after the base snapshot.
// This field is REQUIRED.
string target_snapshot_id = 2;

// This indicates the zero based starting byte position in the volume
// snapshot from which the result should be computed.
// It is intended to be used to continue a previously interrupted
// call.
// The CO SHOULD specify this value to be the offset of the byte
// position immediately after the last byte of the last data range
// received, if continuing an interrupted operation, or zero if not.
// The SP MUST ensure that the returned response stream does not
// contain BlockMetadata tuples that end before the requested
// starting_offset: i.e. if S is the requested starting_offset, and
// B0 is block_metadata[0] of the first message in the response
// stream, then (S < B0.byte_offset + B0.size_bytes) must be true.
// This field is REQUIRED.
int64 starting_offset = 3;

// This is an optional parameter, and if non-zero it specifies the
// maximum number of tuples to be returned in each
// GetMetadataDeltaResponse message returned by the RPC stream.
// The plugin will determine an appropriate value if 0, and is
// always free to send less than the requested value.
// This field is OPTIONAL.
int32 max_results = 4;

// Secrets required by plugin to complete the request.
// This field is OPTIONAL. Refer to the `Secrets Requirements`
// section on how to use this field.
map<string, string> secrets = 5 [(csi_secret) = true];
}

// GetMetadataDeltaResponse messages are returned in a gRPC stream.
// Cumulatively, they provide information on the data ranges that
// have changed between the base and target snapshots specified
// in the GetMetadataDeltaRequest message.
message GetMetadataDeltaResponse {
// This specifies the style used in the BlockMetadata sequence.
// This value must be the same in all such messages returned by
// the stream.
// If block_metadata_type is FIXED_LENGTH, then the size_bytes field
// of each message in the block_metadata list MUST be constant.
// This field is REQUIRED.
BlockMetadataType block_metadata_type = 1;

// This returns the capacity of the underlying volume in bytes.
// This value must be the same in all such messages returned by
// the stream.
// This field is REQUIRED.
int64 volume_capacity_bytes = 2;

// This is a list of data range tuples.
// If the value of max_results in the GetMetadataDeltaRequest message
// is greater than zero, then the number of entries in this list MUST
// be less than or equal to that value.
// The SP MUST respect the value of starting_offset in the request.
// The byte_offset fields of adjacent BlockMetadata messages
// MUST be strictly increasing and messages MUST NOT overlap:
// i.e. for any two BlockMetadata messages, A and B, if A is returned
// before B, then (A.byte_offset + A.size_bytes <= B.byte_offset)
// MUST be true.
// This MUST also be true if A and B are from block_metadata lists in
// different GetMetadataDeltaResponse messages in the gRPC stream.
// This field is OPTIONAL.
repeated BlockMetadata block_metadata = 3;
}
Loading
Loading