-
-
Notifications
You must be signed in to change notification settings - Fork 193
Expand file tree
/
Copy pathtranslateStrings.js
More file actions
345 lines (319 loc) · 13.5 KB
/
translateStrings.js
File metadata and controls
345 lines (319 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
/*
* GNU AGPL-3.0 License
*
* Copyright (c) 2022 - present core.ai . All rights reserved.
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://opensource.org/licenses/AGPL-3.0.
*
*/
/* eslint-env node */
const fs = require('fs');
const CORE_AI_TRANSLATE_API_KEY = process.env.CORE_AI_TRANSLATE_API_KEY;
// A global accumulator object initialized to zero
const globalUtilizationMetrics = {
tokens: {
prompt: 0,
candidates: 0,
cachedContent: 0,
total: 0
},
characters: {
input: 0,
output: 0
},
costs: {
input: 0,
output: 0,
total: 0,
currency: "USD"// or set once, if you always expect the same currency
}
};
/**
* Aggregate the utilization metrics from a single object into a global accumulator.
* @param {object} obj - An object with `utilizationMetrics` (tokens, characters, costs).
* @returns {object} The updated global utilization metrics.
*/
function aggregateUtilizationMetrics(obj) {
if (!obj || !obj.utilizationMetrics) {
console.warn("Object missing 'utilizationMetrics' field, nothing to aggregate.");
return globalUtilizationMetrics;
}
const { tokens, characters, costs } = obj.utilizationMetrics;
// Safely add tokens
if (tokens) {
globalUtilizationMetrics.tokens.prompt += tokens.prompt || 0;
globalUtilizationMetrics.tokens.candidates += tokens.candidates || 0;
globalUtilizationMetrics.tokens.cachedContent += tokens.cachedContent || 0;
globalUtilizationMetrics.tokens.total += tokens.total || 0;
}
// Safely add characters
if (characters) {
globalUtilizationMetrics.characters.input += characters.input || 0;
globalUtilizationMetrics.characters.output += characters.output || 0;
}
// Safely add costs
if (costs) {
globalUtilizationMetrics.costs.input += costs.input || 0;
globalUtilizationMetrics.costs.output += costs.output || 0;
globalUtilizationMetrics.costs.total += costs.total || 0;
// currency is assumed to remain consistent; you could also check or update it if needed
}
return globalUtilizationMetrics;
}
function getTranslationrequest(stringsToTranslate, lang) {
return {
translationContext: "This is a bunch of strings extracted from a JavaScript file used to develop our product with is a text editor. Some strings may have HTML or templates(mustache library used). Please translate these strings accurately.",
"source": stringsToTranslate,
"provider": "vertex",
"sourceContext": {
// this is currently unused. you can provide context specific to the key in the source to give the AI
// additional context about the key for translation.
},
translationTargets: [lang] // multiple langs can be given here to translate at a time, for now using only one
};
}
/**
* Sends translation payload to the specified API and returns the result.
*
* @param {object} apiInput - The translation payload object.
* @returns {Promise<any>} The JSON-parsed response from the API.
*/
async function getTranslation(apiInput) {
const url = "https://translate.core.ai/translate";
try {
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
"authorization": `Basic ${CORE_AI_TRANSLATE_API_KEY}`
},
body: JSON.stringify(apiInput)
});
if (!response.ok) {
throw new Error(`Request failed with status ${response.status}`);
}
// Parse and return the JSON response
const data = await response.json();
return data;
} catch (error) {
console.error("Error translating:", error);
throw error;
}
}
function _getAllNLSFolders() {
let names = fs.readdirSync('src/nls');
let nlsFolders =[];
for(let name of names){
let stat = fs.statSync(`src/nls/${name}`);
if(stat.isDirectory()){
nlsFolders.push(name);
}
}
return nlsFolders;
}
let requireDefinedStrings;
global.define = function (jsonObj) {
requireDefinedStrings = jsonObj;
};
require("../src/nls/root/strings");
let rootStrings = requireDefinedStrings;
function _getJson(filePath) {
try {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
} catch (e) {
console.log(`error reading ${filePath}, defaulting to {}`);
return {};
}
}
const FILE_HEADER = `/*
* GNU AGPL-3.0 License
*
* Copyright (c) 2021 - present core.ai . All rights reserved.
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://opensource.org/licenses/AGPL-3.0.
*
*/
define(`,
FILE_FOOTER = ');';
function _isTranslatableKey(key) {
const doNotTranslateDirective = '_DO_NOT_TRANSLATE';
const translationDisabledForKey = `${key}${doNotTranslateDirective}`;
if(key.endsWith(doNotTranslateDirective) || rootStrings[translationDisabledForKey] === 'true'){
return false;
}
return true;
}
async function coreAiTranslate(stringsToTranslate, lang) {
if(!Object.keys(stringsToTranslate).length){
return {};
}
const translationRequest = getTranslationrequest(stringsToTranslate, lang);
const translations = await getTranslation(translationRequest);
aggregateUtilizationMetrics(translations);
console.log("Translation output: ", JSON.stringify(translations, null, 4));
console.log("Aggregate utilization metrics: ", JSON.stringify(globalUtilizationMetrics, null, 4));
if(translations.failedLanguages.length){
const errorStr = `Error translating ${lang}. it has failures `;
console.error(errorStr);
fs.writeFileSync(`src/nls/errors.txt`, errorStr);
// this is oke to continue in case of partial translations.
}
let translationForLanguage = translations.translations[lang];
if(!translationForLanguage) {
lang = lang.replaceAll("-", "_"); // pt_br and pt-br are same. maybe check output for the same
translationForLanguage = translations.translations[lang];
}
if(!translationForLanguage){
const errorStr = `Error translating. AI response doesnt have the language ${lang} translated!`;
console.error(errorStr);
fs.writeFileSync(`src/nls/errors.txt`, errorStr);
return {};
}
return translationForLanguage;
}
function shallowEqual(obj1, obj2) {
// Check if both have the same number of keys:
const keys1 = Object.keys(obj1);
const keys2 = Object.keys(obj2);
if (keys1.length !== keys2.length) {
return false;
}
// Check if all corresponding values match:
for (const key of keys1) {
if (obj1[key] !== obj2[key]) {
return false;
}
}
return true;
}
/**
* Auto translations scans the following files to determine which strings have changed and needs to be translated:
* 1. nls/<lang>/lastTranslated.json holds the last root english strings that was automatically translated. This will be
* used to compare with the current `root/strings.js`. We can determine which strings have changed from the last locale
* translation done and translate only those changed strings.
* 2. `expertTranslations.json` is a dictionary from english string to locale string that can be used to provide manual
* expert translations. When translating, we will check for an available translation in the expert translation json
* before calling google/aws translate. This file is also auto updated when someone provides a translation override
* in a specific locale.
*
* ## How we translate
* First we deduce if there are any manual translations done in `<locale>/strings.js` as users can explicitly provide
* translations like these: https://github.com/phcode-dev/phoenix/pull/588 .
* Then, we figure out the changed strings that needs translation by comparing `root/strings.js` with
* `<lang>/lastTranslated.json`. Then we translate with aws/google translate.
*
* Finally, we update all the autogenerated translations to disk.
*
* @param lang
* @return {Promise<void>}
* @private
*/
async function _processLang(lang) {
if(lang === 'root'){
return;
}
const expertTranslations = _getJson(`src/nls/${lang}/expertTranslations.json`, 'utf8');
let lastTranslated = _getJson(`src/nls/${lang}/lastTranslated.json`, 'utf8');
require(`../src/nls/${lang}/strings`);
let localeStringsJS = requireDefinedStrings;
let translations = {}, updatedLastTranslatedJSON={}, pendingTranslate = {};
for(let rootKey of Object.keys(rootStrings)){
if(!_isTranslatableKey(rootKey)){
continue; // move on to next string
}
let englishStringToTranslate = rootStrings[rootKey];
let lastTranslatedEnglishString = lastTranslated[rootKey];
if(englishStringToTranslate === lastTranslatedEnglishString){
// we have already translated this in the last pass.
// Load expert translation if there is one else we don't need to translate, use existing translation as is.
translations[rootKey] = expertTranslations[englishStringToTranslate] || localeStringsJS[rootKey];
if(translations[rootKey]){
updatedLastTranslatedJSON[rootKey] = englishStringToTranslate;
} else {
// we dont have a last local translation in locale strings.js file to use. this cannot happen
// except in a translation reset pass where we delete all translations and restart like when we moved
// to core.ai auto translate.
pendingTranslate[rootKey] = englishStringToTranslate;
}
} else {
// this is a new english string or there is a string change.
if(expertTranslations[englishStringToTranslate]){
// prefer expert translations over machine translations
translations[rootKey] = expertTranslations[englishStringToTranslate];
updatedLastTranslatedJSON[rootKey] = englishStringToTranslate;
} else {
pendingTranslate[rootKey] = englishStringToTranslate;
}
}
}
//let translatedText = await _translateString(englishStringToTranslate, lang);
console.log(`Translating ${Object.keys(pendingTranslate).length} strings to`, lang);
const aiTranslations = await coreAiTranslate(pendingTranslate, lang);
const allRootKeys = new Set(Object.keys(rootStrings));
for(let rootKey of Object.keys(pendingTranslate)){
if(!allRootKeys.has(rootKey)){
// AI hallucinated a root key?
const errorStr = `AI translated for a root key that doesnt exist!!! in ${lang}: ${rootKey} \nTranslation: ${aiTranslations[rootKey]}`;
console.error(errorStr);
fs.writeFileSync(`src/nls/errors.txt`, errorStr);
continue;
}
let englishStringToTranslate = rootStrings[rootKey];
const translatedText = aiTranslations[rootKey];
if(translatedText){
translations[rootKey] = translatedText;
updatedLastTranslatedJSON[rootKey] = englishStringToTranslate;
}
}
// now detect any keys that has not yet been translated
const allKeys = Object.keys(rootStrings).filter(_isTranslatableKey);
const translatedKeys = Object.keys(translations);
const notTranslated = allKeys.filter(key => !translatedKeys.includes(key));
if(notTranslated.length){
const errorStr = `Some strings not translated in ${lang}\n${notTranslated}`;
console.error(errorStr);
fs.writeFileSync(`src/nls/errors.txt`, errorStr);
}
let translatedStringsJSON = JSON.stringify(translations, null, 2);
let fileToWrite = `${FILE_HEADER}${translatedStringsJSON}${FILE_FOOTER}`;
if(!shallowEqual(translations, localeStringsJS)){
fs.writeFileSync(`src/nls/${lang}/strings.js`, fileToWrite);
}
if(!shallowEqual(updatedLastTranslatedJSON, lastTranslated)){
fs.writeFileSync(`src/nls/${lang}/lastTranslated.json`, JSON.stringify(updatedLastTranslatedJSON, null, 2));
}
}
async function translate() {
console.log("please make sure that core.ai lang translation service credentials are available as env vars.");
return new Promise(async (resolve)=>{
let langs = _getAllNLSFolders();
console.log(langs);
for(let lang of langs){
await _processLang(lang);
}
resolve();
});
}
exports.translate = translate;