-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathdeidentify-file.ts
More file actions
148 lines (123 loc) · 5.01 KB
/
deidentify-file.ts
File metadata and controls
148 lines (123 loc) · 5.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import {
Credentials,
Env,
LogLevel,
Skyflow,
SkyflowConfig,
SkyflowError,
DeidentifyFileRequest,
DeidentifyFileOptions,
DetectEntities,
MaskingMethod,
DetectOutputTranscription,
TokenFormat,
TokenType,
Transformations,
Bleep,
VaultConfig,
DeidentifyFileResponse,
FileInput,
} from 'skyflow-node';
import fs from 'fs';
/**
* Skyflow Deidentify File Example
*
* This sample demonstrates how to use all available options for de-identifying files.
* Supported file types: images (jpg, png, etc.), pdf, audio (mp3, wav), documents, spreadsheets, presentations, structured text.
*
* Note: File de-identification requires Node.js version 20 or above.
*/
async function performDeidentifyFile() {
try {
// Step 1: Configure Credentials
const credentials: Credentials = {
path: 'path-to-credentials-json', // Path to credentials file
};
// Step 2: Configure Vault
const primaryVaultConfig: VaultConfig = {
vaultId: '<VAULT_ID>',
clusterId: '<CLUSTER_ID>',
env: Env.PROD,
credentials: credentials,
};
// Step 3: Configure Skyflow Client
const skyflowConfig: SkyflowConfig = {
vaultConfigs: [primaryVaultConfig],
logLevel: LogLevel.INFO, // Recommended to use LogLevel.ERROR in production environment.
};
// Initialize Skyflow Client
const skyflowClient: Skyflow = new Skyflow(skyflowConfig);
// Step 4: Prepare Deidentify File Request
// Replace with your file object (e.g., from fs.readFileSync or browser File API)
const filePath = '<FILE_PATH>'; // Replace with the path to your file
const buffer = fs.readFileSync(filePath);
const file = new File([buffer], filePath);
// Pass wither file object or file path, but not both.
const fileInput: FileInput = {
file
}
const deidentifyFile = new DeidentifyFileRequest(fileInput);
// Step 5: Configure DeidentifyFileOptions
const options = new DeidentifyFileOptions();
// Entities to detect and deidentify
options.setEntities([DetectEntities.SSN, DetectEntities.CREDIT_CARD]);
// Allowlist regex patterns (entities matching these will NOT be deidentified)
options.setAllowRegexList(['<YOUR_REGEX_PATTERN>']);
// Restrict de-identification to entities matching these regex patterns
options.setRestrictRegexList(['<YOUR_REGEX_PATTERN>']);
// Token format for deidentified entities
const tokenFormat = new TokenFormat();
tokenFormat.setDefault(TokenType.ENTITY_ONLY);
options.setTokenFormat(tokenFormat);
// Custom transformations for entities
// const transformations = new Transformations(); // Transformations cannot be applied to Documents, Images, or PDFs file formats.
// transformations.setShiftDays({
// max: 30,
// min: 10,
// entities: [DetectEntities.SSN],
// });
// options.setTransformations(transformations);
// Output directory for saving the deidentified file
options.setOutputDirectory('<OUTPUT_DIRECTORY_PATH>'); // Replace with your output directory
// Wait time for response (max 64 seconds)
options.setWaitTime(15);
// --- Image Options (apply when file is an image) ---
// options.setOutputProcessedImage(true); // Include processed image in output
// options.setOutputOcrText(true); // Include OCR text in response
// options.setMaskingMethod(MaskingMethod.Blackbox); // Masking method for image entities
// --- PDF Options (apply when file is a PDF) ---
// options.setPixelDensity(1.5); // Pixel density for PDF processing
// options.setMaxResolution(2000); // Max resolution for PDF
// --- Audio Options (apply when file is audio) ---
// options.setOutputProcessedAudio(true); // Include processed audio in output
// options.setOutputTranscription(DetectOutputTranscription.PLAINTEXT_TRANSCRIPTION); // Type of transcription
// Bleep audio configuration
// const bleep = new Bleep();
// bleep.setGain(5); // Loudness in dB
// bleep.setFrequency(1000); // Pitch in Hz
// bleep.setStartPadding(0.1); // Padding at start in seconds
// bleep.setStopPadding(0.2); // Padding at end in seconds
// options.setBleep(bleep);
// Step 6: Call deidentifyFile API
const response: DeidentifyFileResponse = await skyflowClient
.detect(primaryVaultConfig.vaultId)
.deidentifyFile(deidentifyFile, options);
// Handle Successful Response
console.log('Deidentify File Response:', response);
console.log('Deidentified File:', response.file);
console.log('Deidentified File base64:', response.fileBase64);
} catch (error) {
// Comprehensive Error Handling
if (error instanceof SkyflowError) {
console.error('Skyflow Specific Error:', {
code: error.error?.http_code,
message: error.message,
details: error.error?.details,
});
} else {
console.error('Unexpected Error:', JSON.stringify(error));
}
}
}
// Invoke the deidentify file function
performDeidentifyFile();