@@ -39,6 +39,24 @@ namespace vineyard {
3939// forward declaration
4040struct LLMKV ;
4141
42+ // A KVCacheChunk contains all the KV tensors of a fixed number of
43+ // tokens (i.e., `chunk_size`).
44+ //
45+ // In its object blob, we first store all the KV tensors, and then
46+ // store all the tokens (including prefix tokens and current tokens
47+ // cached in the chunk), which will be used to avoid hash conflicts.
48+ //
49+ // In its metadata, we store the namespace (i.e., `ns_`), which will
50+ // be used as the name prefix of each chunk. Clients can also use the
51+ // namespace to list all the chunks. Access time (i.e., 'access_time_`)
52+ // in its metadata is used for the TTL-based global GC. We also have
53+ // the md5sum of all tokens (including prefix tokens and current tokens)
54+ // in its metadata. When we reconstruct a chunk from the object blob
55+ // and metadata, we calculate the md5sum of all tokens in the blob and
56+ // compare it with the md5sum in the metadata. If they are the same,
57+ // we consider the chunk is valid. Otherwise, we consider the chunk is
58+ // corrupted. By far, we don't use the md5sum of the tensors to alleviate
59+ // the compute overhead.
4260class KVCacheChunk : public vineyard ::Registered<KVCacheChunk> {
4361 public:
4462 inline static constexpr char kFieldNameNS [] = " namespace" ;
@@ -52,12 +70,17 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
5270
5371 private:
5472 std::shared_ptr<Buffer> buffer_;
73+ // number of prefix tokens and current tokens in the chunk
5574 int total_tokens_;
5675 int tensor_nbytes_;
5776 int layer_;
5877 int chunk_size_;
78+ // access time is used for TTL-based global GC
5979 uint64_t access_time_;
80+ // md5sum of all tokens (including prefix tokens and current tokens)
6081 std::string md5_;
82+ // namespace. chunks within the same namespace will be shared
83+ // among different clients
6184 std::string ns_;
6285
6386 public:
@@ -79,23 +102,53 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
79102 friend class KVCacheChunkBuilder ;
80103};
81104
105+ // A KVCacheChunkBuilder is used to build a KVCacheChunk.
106+ //
107+ // We have two kinds of builders:
108+ // 1. The builder to build a new chunk.
109+ // 2. The builder to rebuild a chunk from the object blob and metadata.
110+ //
111+ // For the first kind of builder, `Make` creates an empty chunk and an
112+ // `Update` filles the chunk with KV tensors. After `Update`, the chunk
113+ // is marked as ready and waiting readers will be notified. This kind
114+ // of builder can be sealed to a KVCacheChunk.
115+ //
116+ // For the second kind of builder, `Make` only assignes the chunk id and
117+ // the first `Query` will trigger a construction of the chunk, i.e.,
118+ // constructing the corresponding chunk with fetched metadata and blob.
119+ // After construction, the chunk is marked as ready and other waiting
120+ // readers will be notified. This kind of builder will never be sealed
121+ // since the chunk already exists in the object store.
122+ //
123+ // We also track the access time of the chunk in the builder. Global
124+ // access time is the latest access time of the global object we know.
125+ // Access time is the local access time that is updated by each access.
126+ // The local access time will finally be updated to the global access
127+ // time based on the policy used in AIBrixBlobStorage.
82128class KVCacheChunkBuilder {
83129 private:
84130 RPCClient& rpc_client_;
85131 std::vector<int > all_tokens_;
86132 std::shared_ptr<RemoteBlobWriter> remote_buffer_writer_ = nullptr ;
87133 ObjectID chunk_id_;
88134 std::shared_ptr<Buffer> buffer_ = nullptr ;
135+
89136 int total_tokens_;
90137 int tensor_nbytes_;
91138 int layer_;
92139 int chunk_size_;
93140 std::string ns_;
141+
142+ // `time_mu_` protects the access times of the chunk.
94143 std::shared_mutex time_mu_;
95144 uint64_t g_access_time_ = 0 ;
96145 uint64_t access_time_ = 0 ;
146+
147+ // `mutex_` and `cv_` are used to block readers until the chunk
148+ // is ready to be read.
97149 std::mutex mutex_;
98150 std::condition_variable cv_;
151+
99152 std::atomic<bool > is_ready_ = false ;
100153 std::string md5_;
101154
@@ -140,6 +193,7 @@ class KVCacheChunkBuilder {
140193 return access_time_;
141194 }
142195
196+ // Whether the chunk is ready to be read.
143197 bool IsReady () { return is_ready_; }
144198
145199 std::shared_ptr<Object> Seal ();
@@ -150,6 +204,7 @@ class KVCacheChunkBuilder {
150204
151205 void PrintKVCacheChunk ();
152206
207+ // Whether the chunk is the same as the chunk with the given metadata.
153208 Status IsSame (const ObjectMeta& meta);
154209
155210 KVCacheChunkBuilder (RPCClient& rpc_client, int tensor_nbytes, int layer,
0 commit comments