-
Notifications
You must be signed in to change notification settings - Fork 110
Expand file tree
/
Copy pathdequantize_gptq.h
More file actions
30 lines (23 loc) · 1.79 KB
/
dequantize_gptq.h
File metadata and controls
30 lines (23 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#ifndef __INFINIOP_DEQUANTIZE_GPTQ_API_H__
#define __INFINIOP_DEQUANTIZE_GPTQ_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopDequantizeGPTQDescriptor_t;
__C __export infiniStatus_t infiniopCreateDequantizeGPTQDescriptor(infiniopHandle_t handle,
infiniopDequantizeGPTQDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t qweight_desc,
infiniopTensorDescriptor_t scales_desc,
infiniopTensorDescriptor_t zeros_desc,
infiniopTensorDescriptor_t g_idx_desc); // add g_idx
__C __export infiniStatus_t infiniopGetDequantizeGPTQWorkspaceSize(infiniopDequantizeGPTQDescriptor_t desc, size_t *size);
__C __export infiniStatus_t infiniopDequantizeGPTQ(infiniopDequantizeGPTQDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *qweight,
const void *scales,
const void *zeros,
const void *g_idx, // add g_idx
void *stream);
__C __export infiniStatus_t infiniopDestroyDequantizeGPTQDescriptor(infiniopDequantizeGPTQDescriptor_t desc);
#endif