Skip to content

Commit 589db98

Browse files
committed
fix(connectors): address audit findings across 7 connectors
- github: branch with slashes encoded per-segment in sourceUrl; treat 403 Contents API responses (>100MB or restricted) as null instead of throwing; fetchBlobContent throws on unexpected encoding rather than returning empty content - jira: ADF extractor handles hardBreak, mention, and emoji nodes - google-docs: paragraph join uses newline so heading/body boundaries are preserved - servicenow: drop legacy 'wiki' field from kb_knowledge requests; only the 'text' field is reliably present on modern instances - salesforce: KnowledgeArticleVersion query adds IsLatestVersion=true to avoid duplicate historical versions; encode externalId in sObject GET - slack: subtype filter switches to a denylist (SLACK_NOISE_SUBTYPES) so thread_broadcast / me_message / reminder_add are kept; reply_count folded into contentHash to detect threaded reply deletes - confluence: drop bare-string cursor fallback; unparseable cursors now restart the listing instead of silently re-listing blogposts from 0
1 parent 265071c commit 589db98

7 files changed

Lines changed: 72 additions & 16 deletions

File tree

apps/sim/connectors/confluence/confluence.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,11 @@ async function listAllContentTypes(
489489
pagesDone = parsed.pagesDone === true
490490
blogsDone = parsed.blogsDone === true
491491
} catch {
492-
pageCursor = cursor
492+
/**
493+
* Older bare-string cursors are no longer emitted; fall through and
494+
* restart instead of silently re-listing blogposts from page 0.
495+
*/
496+
logger.warn('Ignoring unparseable Confluence cursor; restarting listing')
493497
}
494498
}
495499

apps/sim/connectors/github/github.ts

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,12 @@ async function fetchBlobContent(
138138
if (isBinaryBuffer(buf)) return null
139139
return buf.toString('utf8')
140140
}
141-
if (encoding === 'utf-8') {
142-
return content
143-
}
144-
return ''
141+
/**
142+
* Per https://docs.github.com/en/rest/git/blobs the Blobs API only ever
143+
* returns base64. Refuse to silently persist empty content for an
144+
* unexpected encoding so a sync surfaces the error instead.
145+
*/
146+
throw new Error(`Unexpected git blob encoding for ${sha}: ${encoding ?? 'undefined'}`)
145147
}
146148

147149
/**
@@ -162,7 +164,7 @@ function treeItemToStub(
162164
content: '',
163165
contentDeferred: true,
164166
mimeType: 'text/plain',
165-
sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
167+
sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch.split('/').map(encodeURIComponent).join('/')}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
166168
contentHash: `${GIT_SHA_PREFIX}${item.sha}`,
167169
metadata: {
168170
path: item.path,
@@ -307,6 +309,13 @@ export const githubConnector: ConnectorConfig = {
307309

308310
if (!response.ok) {
309311
if (response.status === 404) return null
312+
if (response.status === 403) {
313+
logger.info('Skipping GitHub file rejected by Contents API', {
314+
path,
315+
status: response.status,
316+
})
317+
return null
318+
}
310319
throw new Error(`Failed to fetch file ${path}: ${response.status}`)
311320
}
312321

@@ -350,7 +359,7 @@ export const githubConnector: ConnectorConfig = {
350359
content,
351360
contentDeferred: false,
352361
mimeType: 'text/plain',
353-
sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${path.split('/').map(encodeURIComponent).join('/')}`,
362+
sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch.split('/').map(encodeURIComponent).join('/')}/${path.split('/').map(encodeURIComponent).join('/')}`,
354363
contentHash: `${GIT_SHA_PREFIX}${data.sha as string}`,
355364
metadata: {
356365
path,

apps/sim/connectors/google-docs/google-docs.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ function extractTextFromDocsBody(doc: DocsDocument): string {
9191
}
9292
}
9393

94-
return parts.join('').trim()
94+
return parts.join('\n').trim()
9595
}
9696

9797
/**

apps/sim/connectors/salesforce/salesforce.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ const OBJECT_FIELDS: Record<string, string[]> = {
4242

4343
/** SOQL WHERE clause additions per object type. */
4444
const OBJECT_WHERE: Record<string, string> = {
45-
KnowledgeArticleVersion: " WHERE PublishStatus='Online' AND Language='en_US'",
45+
KnowledgeArticleVersion:
46+
" WHERE PublishStatus='Online' AND IsLatestVersion=true AND Language='en_US'",
4647
} as const
4748

4849
/**
@@ -389,7 +390,7 @@ export const salesforceConnector: ConnectorConfig = {
389390
instanceUrl = await resolveInstanceUrl(accessToken, syncContext)
390391
}
391392

392-
const url = `${instanceUrl}sobjects/${objectType}/${externalId}?fields=${fields.join(',')}`
393+
const url = `${instanceUrl}sobjects/${objectType}/${encodeURIComponent(externalId)}?fields=${fields.join(',')}`
393394

394395
const response = await fetchWithRetry(url, {
395396
method: 'GET',

apps/sim/connectors/servicenow/servicenow.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ interface ServiceNowRecord {
3636
interface KBArticle extends ServiceNowRecord {
3737
short_description?: string
3838
text?: string
39-
wiki?: string
4039
workflow_state?: string
4140
kb_category?: string | { display_value?: string }
4241
kb_knowledge_base?: string | { display_value?: string }
@@ -264,7 +263,7 @@ function priorityLabel(priority: string | undefined): string {
264263
*/
265264
function kbArticleToDocument(article: KBArticle, instanceUrl: string): ExternalDocument {
266265
const title = rawValue(article.short_description) || rawValue(article.number) || article.sys_id
267-
const articleText = rawValue(article.text) || rawValue(article.wiki) || ''
266+
const articleText = rawValue(article.text) || ''
268267
const content = htmlToPlainText(articleText)
269268
const sysId = rawValue(article.sys_id) || article.sys_id
270269
const updatedOn = rawValue(article.sys_updated_on) || ''
@@ -549,7 +548,7 @@ export const servicenowConnector: ConnectorConfig = {
549548
const query = isKB ? buildKBQuery(sourceConfig) : buildIncidentQuery(sourceConfig)
550549

551550
const fields = isKB
552-
? 'sys_id,short_description,text,wiki,workflow_state,kb_category,kb_knowledge_base,number,author,sys_created_by,sys_updated_by,sys_updated_on,sys_created_on'
551+
? 'sys_id,short_description,text,workflow_state,kb_category,kb_knowledge_base,number,author,sys_created_by,sys_updated_by,sys_updated_on,sys_created_on'
553552
: 'sys_id,number,short_description,description,state,priority,category,assigned_to,opened_by,close_notes,resolution_notes,sys_created_by,sys_updated_by,sys_updated_on,sys_created_on'
554553

555554
const params: Record<string, string> = {
@@ -625,7 +624,7 @@ export const servicenowConnector: ConnectorConfig = {
625624
}
626625

627626
const fields = isKB
628-
? 'sys_id,short_description,text,wiki,workflow_state,kb_category,kb_knowledge_base,number,author,sys_created_by,sys_updated_by,sys_updated_on,sys_created_on'
627+
? 'sys_id,short_description,text,workflow_state,kb_category,kb_knowledge_base,number,author,sys_created_by,sys_updated_by,sys_updated_on,sys_created_on'
629628
: 'sys_id,number,short_description,description,state,priority,category,assigned_to,opened_by,close_notes,resolution_notes,sys_created_by,sys_updated_by,sys_updated_on,sys_created_on'
630629

631630
const instanceUrl = resolveServiceNowInstanceUrl(sourceConfig.instanceUrl as string)

apps/sim/connectors/slack/slack.ts

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,33 @@ const SLACK_API_BASE = 'https://slack.com/api'
1111
const DEFAULT_MAX_MESSAGES = 1000
1212
const MESSAGES_PER_PAGE = 200
1313

14+
/**
15+
* Message subtypes that carry no user-authored text (channel events, bot
16+
* lifecycle, etc.). Per https://api.slack.com/events/message every other
17+
* subtype — `bot_message`, `file_share`, `me_message`, `thread_broadcast`,
18+
* `reminder_add`, `file_comment`, etc. — can carry meaningful content.
19+
*/
20+
const SLACK_NOISE_SUBTYPES = new Set([
21+
'channel_join',
22+
'channel_leave',
23+
'channel_topic',
24+
'channel_purpose',
25+
'channel_name',
26+
'channel_archive',
27+
'channel_unarchive',
28+
'group_join',
29+
'group_leave',
30+
'group_topic',
31+
'group_purpose',
32+
'group_name',
33+
'group_archive',
34+
'group_unarchive',
35+
'pinned_item',
36+
'unpinned_item',
37+
'bot_add',
38+
'bot_remove',
39+
])
40+
1441
interface SlackMessage {
1542
type: string
1643
user?: string
@@ -187,7 +214,13 @@ async function formatMessages(
187214
for (const msg of chronological) {
188215
// Skip non-user messages (join/leave, bot messages without text, etc.)
189216
if (!msg.text) continue
190-
if (msg.subtype && msg.subtype !== 'bot_message' && msg.subtype !== 'file_share') continue
217+
/**
218+
* Drop only known noise subtypes (channel join/leave/topic events,
219+
* bot add/remove, etc.). Per https://api.slack.com/events/message any
220+
* subtype with user-authored text — `thread_broadcast`, `me_message`,
221+
* `bot_message`, `file_share`, `reminder_add`, etc. — should be kept.
222+
*/
223+
if (msg.subtype && SLACK_NOISE_SUBTYPES.has(msg.subtype)) continue
191224

192225
const timestamp = formatSlackTimestamp(msg.ts)
193226
const userName = msg.user
@@ -314,12 +347,19 @@ async function buildSlackChannelDocument(
314347
*/
315348
let maxEditTs = ''
316349
let maxReplyTs = ''
350+
let totalReplies = 0
317351
for (const m of messages) {
318352
if (m.edited?.ts && m.edited.ts > maxEditTs) maxEditTs = m.edited.ts
319353
if (m.latest_reply && m.latest_reply > maxReplyTs) maxReplyTs = m.latest_reply
354+
if (typeof m.reply_count === 'number') totalReplies += m.reply_count
320355
}
321356

322-
const contentHash = `slack:${channel.id}:${oldestTs ?? 'empty'}:${lastActivityTs ?? 'empty'}:${messageCount}:${maxEditTs || 'noedit'}:${maxReplyTs || 'noreply'}`
357+
/**
358+
* `latest_reply` alone misses reply edits and deletes. Folding `reply_count`
359+
* in catches deletes (count drops) but still cannot detect reply edits
360+
* without fetching `conversations.replies` for each parent.
361+
*/
362+
const contentHash = `slack:${channel.id}:${oldestTs ?? 'empty'}:${lastActivityTs ?? 'empty'}:${messageCount}:${maxEditTs || 'noedit'}:${maxReplyTs || 'noreply'}:${totalReplies}`
323363

324364
return { content, contentHash, messageCount, lastActivityTs }
325365
}

apps/sim/tools/jira/utils.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ export function extractAdfText(content: any): string | null {
6262
return content.map(extractAdfText).filter(Boolean).join(' ')
6363
}
6464
if (content.type === 'text') return content.text || ''
65+
if (content.type === 'hardBreak') return '\n'
66+
if (content.type === 'mention') return content.attrs?.text || ''
67+
if (content.type === 'emoji') return content.attrs?.shortName || content.attrs?.text || ''
6568
if (content.content) return extractAdfText(content.content)
6669
return ''
6770
}

0 commit comments

Comments
 (0)