-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathdeidentify-file-with-filepath-async.ts
More file actions
145 lines (122 loc) · 4.94 KB
/
deidentify-file-with-filepath-async.ts
File metadata and controls
145 lines (122 loc) · 4.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import {
Credentials,
Env,
LogLevel,
Skyflow,
SkyflowConfig,
SkyflowError,
DeidentifyFileRequest,
DeidentifyFileOptions,
DetectEntities,
MaskingMethod,
DetectOutputTranscription,
TokenFormat,
TokenType,
Transformations,
Bleep,
VaultConfig,
DeidentifyFileResponse,
FileInput,
} from 'skyflow-node';
/**
* Skyflow Deidentify File Example
*
* This sample demonstrates how to use all available options for deidentifying files.
* Supported file types: images (jpg, png, etc.), pdf, audio (mp3, wav), documents, spreadsheets, presentations, structured text.
*
* Note: File deidentification requires Node.js version 20 or above.
*/
async function performDeidentifyFileAsync() {
// Step 1: Configure Credentials
const credentials: Credentials = {
path: 'path-to-credentials-json', // Replace with your actual token or path to credentials file
};
// Step 2: Configure Vault
const primaryVaultConfig: VaultConfig = {
vaultId: '<VAULT_ID>',
clusterId: '<CLUSTER_ID>',
env: Env.PROD,
credentials: credentials,
};
// Step 3: Configure Skyflow Client
const skyflowConfig: SkyflowConfig = {
vaultConfigs: [primaryVaultConfig],
logLevel: LogLevel.INFO, // Recommended to use LogLevel.ERROR in production environment.
};
// Initialize Skyflow Client
const skyflowClient: Skyflow = new Skyflow(skyflowConfig);
// Step 4: Prepare Deidentify File Request
// Replace with your file object (e.g., from fs.readFileSync or browser File API)
const filePath: string = '<FILE_PATH>'; // Replace with the path to your file
// Pass either file object or file path, but not both.
const fileInput: FileInput = {
filePath
};
const deidentifyFile = new DeidentifyFileRequest(fileInput);
// Step 5: Configure DeidentifyFileOptions
const options = new DeidentifyFileOptions();
// Entities to detect and deidentify
options.setEntities([DetectEntities.SSN, DetectEntities.CREDIT_CARD]);
// Allowlist regex patterns (entities matching these will NOT be deidentified)
options.setAllowRegexList(['<YOUR_REGEX_PATTERN>']);
// Restrict deidentification to entities matching these regex patterns
options.setRestrictRegexList(['<YOUR_REGEX_PATTERN>']);
// Token format for deidentified entities
const tokenFormat = new TokenFormat();
tokenFormat.setDefault(TokenType.ENTITY_ONLY);
options.setTokenFormat(tokenFormat);
// Custom transformations for entities
// const transformations = new Transformations(); // Transformations cannot be applied to Documents, Images, or PDFs file formats.
// transformations.setShiftDays({
// max: 30,
// min: 10,
// entities: [DetectEntities.SSN],
// });
// options.setTransformations(transformations);
// Output directory for saving the deidentified file
// Providing an output directory is not supported in Cloudflare Workers
options.setOutputDirectory('<OUTPUT_DIRECTORY_PATH>'); // Replace with your output directory
// Wait time for response (max 64 seconds)
options.setWaitTime(15);
// --- Image Options (apply when file is an image) ---
// options.setOutputProcessedImage(true); // Include processed image in output
// options.setOutputOcrText(true); // Include OCR text in response
// options.setMaskingMethod(MaskingMethod.Blackbox); // Masking method for image entities
// --- PDF Options (apply when file is a PDF) ---
// options.setPixelDensity(1.5); // Pixel density for PDF processing
// options.setMaxResolution(2000); // Max resolution for PDF
// --- Audio Options (apply when file is audio) ---
// options.setOutputProcessedAudio(true); // Include processed audio in output
// options.setOutputTranscription(DetectOutputTranscription.PLAINTEXT_TRANSCRIPTION); // Type of transcription
// Bleep audio configuration
// const bleep = new Bleep();
// bleep.setGain(5); // Loudness in dB
// bleep.setFrequency(1000); // Pitch in Hz
// bleep.setStartPadding(0.1); // Padding at start in seconds
// bleep.setStopPadding(0.2); // Padding at end in seconds
// options.setBleep(bleep);
// Step 6: Call deidentifyFile API
skyflowClient
.detect(primaryVaultConfig.vaultId)
.deidentifyFile(deidentifyFile, options)
.then((response: DeidentifyFileResponse) => {
// Handle Successful Response
console.log('Deidentify File Response:', response);
console.log('Deidentified File:', response.file);
console.log('Deidentified File base64:', response.fileBase64);
})
.catch((error) => {
// Comprehensive Error Handling
if (error instanceof SkyflowError) {
console.error('Skyflow Specific Error:', {
code: error.error?.http_code,
message: error.message,
details: error.error?.details,
});
} else {
console.error('Unexpected Error:', JSON.stringify(error));
}
});
}
// Invoke the deidentify file function
performDeidentifyFileAsync();