-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathendpoint.go
More file actions
561 lines (506 loc) · 21.1 KB
/
endpoint.go
File metadata and controls
561 lines (506 loc) · 21.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
package together
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"slices"
"time"
"github.com/togethercomputer/together-go/internal/apijson"
"github.com/togethercomputer/together-go/internal/apiquery"
"github.com/togethercomputer/together-go/internal/requestconfig"
"github.com/togethercomputer/together-go/option"
"github.com/togethercomputer/together-go/packages/param"
"github.com/togethercomputer/together-go/packages/respjson"
"github.com/togethercomputer/together-go/shared/constant"
)
// EndpointService contains methods and other services that help with interacting
// with the together API.
//
// Note, unlike clients, this service does not read variables from the environment
// automatically. You should not instantiate this service directly, and instead use
// the [NewEndpointService] method instead.
type EndpointService struct {
Options []option.RequestOption
}
// NewEndpointService generates a new service that applies the given options to
// each request. These options are applied after the parent client's options (if
// there is one), and before any request-specific options.
func NewEndpointService(opts ...option.RequestOption) (r EndpointService) {
r = EndpointService{}
r.Options = opts
return
}
// Creates a new dedicated endpoint for serving models. The endpoint will
// automatically start after creation. You can deploy any supported model on
// hardware configurations that meet the model's requirements.
func (r *EndpointService) New(ctx context.Context, body EndpointNewParams, opts ...option.RequestOption) (res *DedicatedEndpoint, err error) {
opts = slices.Concat(r.Options, opts)
path := "endpoints"
err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...)
return res, err
}
// Retrieves details about a specific endpoint, including its current state,
// configuration, and scaling settings.
func (r *EndpointService) Get(ctx context.Context, endpointID string, opts ...option.RequestOption) (res *DedicatedEndpoint, err error) {
opts = slices.Concat(r.Options, opts)
if endpointID == "" {
err = errors.New("missing required endpointId parameter")
return nil, err
}
path := fmt.Sprintf("endpoints/%s", endpointID)
err = requestconfig.ExecuteNewRequest(ctx, http.MethodGet, path, nil, &res, opts...)
return res, err
}
// Updates an existing endpoint's configuration. You can modify the display name,
// autoscaling settings, or change the endpoint's state (start/stop).
func (r *EndpointService) Update(ctx context.Context, endpointID string, body EndpointUpdateParams, opts ...option.RequestOption) (res *DedicatedEndpoint, err error) {
opts = slices.Concat(r.Options, opts)
if endpointID == "" {
err = errors.New("missing required endpointId parameter")
return nil, err
}
path := fmt.Sprintf("endpoints/%s", endpointID)
err = requestconfig.ExecuteNewRequest(ctx, http.MethodPatch, path, body, &res, opts...)
return res, err
}
// Returns a list of all endpoints associated with your account. You can filter the
// results by type (dedicated or serverless).
func (r *EndpointService) List(ctx context.Context, query EndpointListParams, opts ...option.RequestOption) (res *EndpointListResponse, err error) {
opts = slices.Concat(r.Options, opts)
path := "endpoints"
err = requestconfig.ExecuteNewRequest(ctx, http.MethodGet, path, query, &res, opts...)
return res, err
}
// Permanently deletes an endpoint. This action cannot be undone.
func (r *EndpointService) Delete(ctx context.Context, endpointID string, opts ...option.RequestOption) (err error) {
opts = slices.Concat(r.Options, opts)
opts = append([]option.RequestOption{option.WithHeader("Accept", "*/*")}, opts...)
if endpointID == "" {
err = errors.New("missing required endpointId parameter")
return err
}
path := fmt.Sprintf("endpoints/%s", endpointID)
err = requestconfig.ExecuteNewRequest(ctx, http.MethodDelete, path, nil, nil, opts...)
return err
}
// List all available availability zones.
func (r *EndpointService) ListAvzones(ctx context.Context, opts ...option.RequestOption) (res *EndpointListAvzonesResponse, err error) {
opts = slices.Concat(r.Options, opts)
path := "clusters/availability-zones"
err = requestconfig.ExecuteNewRequest(ctx, http.MethodGet, path, nil, &res, opts...)
return res, err
}
// Returns a list of available hardware configurations for deploying models. When a
// model parameter is provided, it returns only hardware configurations compatible
// with that model, including their current availability status.
func (r *EndpointService) ListHardware(ctx context.Context, query EndpointListHardwareParams, opts ...option.RequestOption) (res *EndpointListHardwareResponse, err error) {
opts = slices.Concat(r.Options, opts)
path := "hardware"
err = requestconfig.ExecuteNewRequest(ctx, http.MethodGet, path, query, &res, opts...)
return res, err
}
// Configuration for automatic scaling of replicas based on demand.
type Autoscaling struct {
// The maximum number of replicas to scale up to under load
MaxReplicas int64 `json:"max_replicas" api:"required"`
// The minimum number of replicas to maintain, even when there is no load
MinReplicas int64 `json:"min_replicas" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
MaxReplicas respjson.Field
MinReplicas respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r Autoscaling) RawJSON() string { return r.JSON.raw }
func (r *Autoscaling) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// ToParam converts this Autoscaling to a AutoscalingParam.
//
// Warning: the fields of the param type will not be present. ToParam should only
// be used at the last possible moment before sending a request. Test for this with
// AutoscalingParam.Overrides()
func (r Autoscaling) ToParam() AutoscalingParam {
return param.Override[AutoscalingParam](json.RawMessage(r.RawJSON()))
}
// Configuration for automatic scaling of replicas based on demand.
//
// The properties MaxReplicas, MinReplicas are required.
type AutoscalingParam struct {
// The maximum number of replicas to scale up to under load
MaxReplicas int64 `json:"max_replicas" api:"required"`
// The minimum number of replicas to maintain, even when there is no load
MinReplicas int64 `json:"min_replicas" api:"required"`
paramObj
}
func (r AutoscalingParam) MarshalJSON() (data []byte, err error) {
type shadow AutoscalingParam
return param.MarshalObject(r, (*shadow)(&r))
}
func (r *AutoscalingParam) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Details about a dedicated endpoint deployment
type DedicatedEndpoint struct {
// Unique identifier for the endpoint
ID string `json:"id" api:"required"`
// Configuration for automatic scaling of the endpoint
Autoscaling Autoscaling `json:"autoscaling" api:"required"`
// Timestamp when the endpoint was created
CreatedAt time.Time `json:"created_at" api:"required" format:"date-time"`
// Human-readable name for the endpoint
DisplayName string `json:"display_name" api:"required"`
// The hardware configuration used for this endpoint
Hardware string `json:"hardware" api:"required"`
// The model deployed on this endpoint
Model string `json:"model" api:"required"`
// System name for the endpoint
Name string `json:"name" api:"required"`
// The object type, which is always `endpoint`.
Object constant.Endpoint `json:"object" default:"endpoint"`
// The owner of this endpoint
Owner string `json:"owner" api:"required"`
// Current state of the endpoint
//
// Any of "PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR".
State DedicatedEndpointState `json:"state" api:"required"`
// The type of endpoint
//
// Any of "dedicated".
Type DedicatedEndpointType `json:"type" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
ID respjson.Field
Autoscaling respjson.Field
CreatedAt respjson.Field
DisplayName respjson.Field
Hardware respjson.Field
Model respjson.Field
Name respjson.Field
Object respjson.Field
Owner respjson.Field
State respjson.Field
Type respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r DedicatedEndpoint) RawJSON() string { return r.JSON.raw }
func (r *DedicatedEndpoint) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Current state of the endpoint
type DedicatedEndpointState string
const (
DedicatedEndpointStatePending DedicatedEndpointState = "PENDING"
DedicatedEndpointStateStarting DedicatedEndpointState = "STARTING"
DedicatedEndpointStateStarted DedicatedEndpointState = "STARTED"
DedicatedEndpointStateStopping DedicatedEndpointState = "STOPPING"
DedicatedEndpointStateStopped DedicatedEndpointState = "STOPPED"
DedicatedEndpointStateError DedicatedEndpointState = "ERROR"
)
// The type of endpoint
type DedicatedEndpointType string
const (
DedicatedEndpointTypeDedicated DedicatedEndpointType = "dedicated"
)
type EndpointListResponse struct {
Data []EndpointListResponseData `json:"data" api:"required"`
// The object type, which is always `list`.
Object constant.List `json:"object" default:"list"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
Data respjson.Field
Object respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListResponse) RawJSON() string { return r.JSON.raw }
func (r *EndpointListResponse) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Details about an endpoint when listed via the list endpoint
type EndpointListResponseData struct {
// Unique identifier for the endpoint
ID string `json:"id" api:"required"`
// Timestamp when the endpoint was created
CreatedAt time.Time `json:"created_at" api:"required" format:"date-time"`
// The model deployed on this endpoint
Model string `json:"model" api:"required"`
// System name for the endpoint
Name string `json:"name" api:"required"`
// The object type, which is always `endpoint`.
Object constant.Endpoint `json:"object" default:"endpoint"`
// The owner of this endpoint
Owner string `json:"owner" api:"required"`
// Current state of the endpoint
//
// Any of "PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR".
State string `json:"state" api:"required"`
// The type of endpoint
//
// Any of "serverless", "dedicated".
Type string `json:"type" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
ID respjson.Field
CreatedAt respjson.Field
Model respjson.Field
Name respjson.Field
Object respjson.Field
Owner respjson.Field
State respjson.Field
Type respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListResponseData) RawJSON() string { return r.JSON.raw }
func (r *EndpointListResponseData) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// List of unique availability zones
type EndpointListAvzonesResponse struct {
Avzones []string `json:"avzones" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
Avzones respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListAvzonesResponse) RawJSON() string { return r.JSON.raw }
func (r *EndpointListAvzonesResponse) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
type EndpointListHardwareResponse struct {
Data []EndpointListHardwareResponseData `json:"data" api:"required"`
// The object type, which is always `list`.
Object constant.List `json:"object" default:"list"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
Data respjson.Field
Object respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListHardwareResponse) RawJSON() string { return r.JSON.raw }
func (r *EndpointListHardwareResponse) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Hardware configuration details with optional availability status
type EndpointListHardwareResponseData struct {
// Unique identifier for the hardware configuration
ID string `json:"id" api:"required"`
// The object type, which is always `hardware`.
Object constant.Hardware `json:"object" default:"hardware"`
// Pricing details for using an endpoint
Pricing EndpointListHardwareResponseDataPricing `json:"pricing" api:"required"`
// Detailed specifications of a hardware configuration
Specs EndpointListHardwareResponseDataSpecs `json:"specs" api:"required"`
// Timestamp of when the hardware status was last updated
UpdatedAt time.Time `json:"updated_at" api:"required" format:"date-time"`
// Indicates the current availability status of a hardware configuration
Availability EndpointListHardwareResponseDataAvailability `json:"availability"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
ID respjson.Field
Object respjson.Field
Pricing respjson.Field
Specs respjson.Field
UpdatedAt respjson.Field
Availability respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListHardwareResponseData) RawJSON() string { return r.JSON.raw }
func (r *EndpointListHardwareResponseData) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Pricing details for using an endpoint
type EndpointListHardwareResponseDataPricing struct {
// Cost per minute of endpoint uptime in cents
CentsPerMinute float64 `json:"cents_per_minute" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
CentsPerMinute respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListHardwareResponseDataPricing) RawJSON() string { return r.JSON.raw }
func (r *EndpointListHardwareResponseDataPricing) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Detailed specifications of a hardware configuration
type EndpointListHardwareResponseDataSpecs struct {
// Number of GPUs in this configuration
GPUCount int64 `json:"gpu_count" api:"required"`
// The GPU interconnect technology
GPULink string `json:"gpu_link" api:"required"`
// Amount of GPU memory in GB
GPUMemory float64 `json:"gpu_memory" api:"required"`
// The type/model of GPU
GPUType string `json:"gpu_type" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
GPUCount respjson.Field
GPULink respjson.Field
GPUMemory respjson.Field
GPUType respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListHardwareResponseDataSpecs) RawJSON() string { return r.JSON.raw }
func (r *EndpointListHardwareResponseDataSpecs) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// Indicates the current availability status of a hardware configuration
type EndpointListHardwareResponseDataAvailability struct {
// The availability status of the hardware configuration
//
// Any of "available", "unavailable", "insufficient".
Status string `json:"status" api:"required"`
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
JSON struct {
Status respjson.Field
ExtraFields map[string]respjson.Field
raw string
} `json:"-"`
}
// Returns the unmodified JSON received from the API
func (r EndpointListHardwareResponseDataAvailability) RawJSON() string { return r.JSON.raw }
func (r *EndpointListHardwareResponseDataAvailability) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
type EndpointNewParams struct {
// Configuration for automatic scaling of the endpoint
Autoscaling AutoscalingParam `json:"autoscaling,omitzero" api:"required"`
// The hardware configuration to use for this endpoint
Hardware string `json:"hardware" api:"required"`
// The model to deploy on this endpoint
Model string `json:"model" api:"required"`
// The number of minutes of inactivity after which the endpoint will be
// automatically stopped. Set to null, omit or set to 0 to disable automatic
// timeout.
InactiveTimeout param.Opt[int64] `json:"inactive_timeout,omitzero"`
// Create the endpoint in a specified availability zone (e.g., us-central-4b)
AvailabilityZone param.Opt[string] `json:"availability_zone,omitzero"`
// This parameter is deprecated and no longer has any effect.
DisablePromptCache param.Opt[bool] `json:"disable_prompt_cache,omitzero"`
// Whether to disable speculative decoding for this endpoint
DisableSpeculativeDecoding param.Opt[bool] `json:"disable_speculative_decoding,omitzero"`
// A human-readable name for the endpoint
DisplayName param.Opt[string] `json:"display_name,omitzero"`
// The desired state of the endpoint
//
// Any of "STARTED", "STOPPED".
State EndpointNewParamsState `json:"state,omitzero"`
paramObj
}
func (r EndpointNewParams) MarshalJSON() (data []byte, err error) {
type shadow EndpointNewParams
return param.MarshalObject(r, (*shadow)(&r))
}
func (r *EndpointNewParams) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// The desired state of the endpoint
type EndpointNewParamsState string
const (
EndpointNewParamsStateStarted EndpointNewParamsState = "STARTED"
EndpointNewParamsStateStopped EndpointNewParamsState = "STOPPED"
)
type EndpointUpdateParams struct {
// The number of minutes of inactivity after which the endpoint will be
// automatically stopped. Set to 0 to disable automatic timeout.
InactiveTimeout param.Opt[int64] `json:"inactive_timeout,omitzero"`
// A human-readable name for the endpoint
DisplayName param.Opt[string] `json:"display_name,omitzero"`
// New autoscaling configuration for the endpoint
Autoscaling AutoscalingParam `json:"autoscaling,omitzero"`
// The desired state of the endpoint
//
// Any of "STARTED", "STOPPED".
State EndpointUpdateParamsState `json:"state,omitzero"`
paramObj
}
func (r EndpointUpdateParams) MarshalJSON() (data []byte, err error) {
type shadow EndpointUpdateParams
return param.MarshalObject(r, (*shadow)(&r))
}
func (r *EndpointUpdateParams) UnmarshalJSON(data []byte) error {
return apijson.UnmarshalRoot(data, r)
}
// The desired state of the endpoint
type EndpointUpdateParamsState string
const (
EndpointUpdateParamsStateStarted EndpointUpdateParamsState = "STARTED"
EndpointUpdateParamsStateStopped EndpointUpdateParamsState = "STOPPED"
)
type EndpointListParams struct {
// If true, return only endpoints owned by the caller
Mine param.Opt[bool] `query:"mine,omitzero" json:"-"`
// Filter endpoints by type
//
// Any of "dedicated", "serverless".
Type EndpointListParamsType `query:"type,omitzero" json:"-"`
// Filter endpoints by usage type
//
// Any of "on-demand", "reserved".
UsageType EndpointListParamsUsageType `query:"usage_type,omitzero" json:"-"`
paramObj
}
// URLQuery serializes [EndpointListParams]'s query parameters as `url.Values`.
func (r EndpointListParams) URLQuery() (v url.Values, err error) {
return apiquery.MarshalWithSettings(r, apiquery.QuerySettings{
ArrayFormat: apiquery.ArrayQueryFormatComma,
NestedFormat: apiquery.NestedQueryFormatBrackets,
})
}
// Filter endpoints by type
type EndpointListParamsType string
const (
EndpointListParamsTypeDedicated EndpointListParamsType = "dedicated"
EndpointListParamsTypeServerless EndpointListParamsType = "serverless"
)
// Filter endpoints by usage type
type EndpointListParamsUsageType string
const (
EndpointListParamsUsageTypeOnDemand EndpointListParamsUsageType = "on-demand"
EndpointListParamsUsageTypeReserved EndpointListParamsUsageType = "reserved"
)
type EndpointListHardwareParams struct {
// Filter hardware configurations by model compatibility. When provided, the
// response includes availability status for each compatible configuration.
// [See all of Together AI's dedicated models](https://docs.together.ai/docs/dedicated-models)
Model param.Opt[string] `query:"model,omitzero" json:"-"`
paramObj
}
// URLQuery serializes [EndpointListHardwareParams]'s query parameters as
// `url.Values`.
func (r EndpointListHardwareParams) URLQuery() (v url.Values, err error) {
return apiquery.MarshalWithSettings(r, apiquery.QuerySettings{
ArrayFormat: apiquery.ArrayQueryFormatComma,
NestedFormat: apiquery.NestedQueryFormatBrackets,
})
}