Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
363 changes: 362 additions & 1 deletion modules/instagram.js
Original file line number Diff line number Diff line change
Expand Up @@ -500,4 +500,365 @@ function extractEmbeddedInstagramJSON(response) {
}

return datas;
}
}

// === auto-generated by 4cat map_item sync — BLOCK REPLACED AUTOMATICALLY ===
// (regenerated from datasources/instagram/search_instagram.py)
export function map_item(item) {
const link = item.link ?? '';
if ((item.product_type === 'ad') || (link && link.startsWith('https://www.facebook.com/ads/ig_redirect'))) {
throw new MapItemException('appears to be Instagram ad, check raw data to confirm and ensure Zeeschuimer is up to date.');
}

const is_polaris_response = '__typename' in item && 'polaris' in item.__typename.toLowerCase();
const is_graph_response = '__typename' in item && item.__typename !== 'XDTMediaDict';

if (is_polaris_response) {
return new MappedItem(parse_polaris_item(item));
} else if (is_graph_response) {
return new MappedItem(parse_graph_item(item));
} else {
return new MappedItem(parse_itemlist_item(item));
}
}

function parse_polaris_item(node) {
const partial_item = node._zs_partial ?? false;
const collected_at = new MissingMappedField(0);
const unix_at = new MissingMappedField(0);
const caption = 'caption' in node ? (node.caption && node.caption.text) : new MissingMappedField('');

const user = node.user;
const owner = node.owner ?? {};
if (user && owner) {
if (owner.id === user.id) {
// Same id; owner may contain less info (e.g. no full name, username, etc.), so prefer user
} else if (user.username !== owner.username) {
throw new MapItemException('Unable to parse item: different user and owner');
}
}
const is_verified = 'is_verified' in user ? user.is_verified : new MissingMappedField(false);

// media type
const type_map = { XIGPolarisPhotoMedia: 'photo', XIGPolarisVideoMedia: 'video' };
const media_type = type_map[node.__typename] ?? 'unknown';
const num_media = node.__typename !== 'XIGPolarisCarouselMedia' ? 1 : node.carousel_media.length;

// get media urls
const display_urls = node.display_uri ?? new MissingMappedField('');
let missing_media = null;
let media_urls;
if ('video_versions' in node) {
media_urls = node.video_versions[0].url ?? new MissingMappedField('');
} else {
media_urls = new MissingMappedField('');
}

const mapped_item = {
collected_from_url: normalize_url_encoding(node.__import_meta?.source_platform_url),
collected_from_view: node._zs_instagram_view ?? '',
partial_item: partial_item,
id: node.code,
timestamp: collected_at,
thread_id: node.code,
parent_id: node.code,
url: 'https://www.instagram.com/p/' + node.code,
body: caption,

author_id: user.id ?? owner.id ?? new MissingMappedField(''),
author: user.username ?? owner.username ?? new MissingMappedField(''),
author_fullname: user.full_name ?? owner.full_name ?? new MissingMappedField(''),
verified: is_verified,
author_avatar_url: user.profile_pic_url ?? owner.profile_pic_url ?? new MissingMappedField(''),

coauthors: new MissingMappedField(''),
coauthor_fullnames: new MissingMappedField(''),
coauthor_ids: new MissingMappedField(''),

media_type: media_type,
num_media: num_media,
image_urls: display_urls,
media_urls: media_urls,

hashtags: caption !== MissingMappedField ? caption.match(/#([^
!@#$%^&*()_+{}:"|<>?
;'
,./`~]+)/g)?.join(',') : '',
usertags: new MissingMappedField(''),
play_count: node.play_count ?? new MissingMappedField(0),

likes_hidden: new MissingMappedField(''),
num_likes: new MissingMappedField(0),
num_comments: new MissingMappedField(0),

location_name: new MissingMappedField(''),
location_id: new MissingMappedField(''),
location_latlong: new MissingMappedField(''),
location_city: new MissingMappedField(''),

unix_timestamp: unix_at,
missing_media: missing_media,
};

return mapped_item;
}

function parse_graph_item(node) {
const caption = node.edge_media_to_caption?.edges[0]?.node.text ?? new MissingMappedField('');
const num_media = node.__typename !== 'GraphSidecar' ? 1 : node.edge_sidecar_to_children.edges.length;

let media_node;
if (node.__typename === 'GraphSidecar') {
media_node = node.edge_sidecar_to_children.edges[0].node;
} else {
media_node = node;
}

let media_url;
if (media_node.__typename === 'GraphVideo') {
media_url = media_node.video_url;
} else if (media_node.__typename === 'GraphImage') {
const resources = media_node.display_resources ?? media_node.thumbnail_resources;
try {
media_url = resources.pop().src;
} catch (e) {
media_url = media_node.display_url ?? '';
}
} else {
media_url = media_node.display_url;
}

const type_map = { GraphSidecar: 'photo', GraphVideo: 'video' };
let media_type;
if (node.__typename !== 'GraphSidecar') {
media_type = type_map[node.__typename] ?? 'unknown';
} else {
const media_types = new Set(node.edge_sidecar_to_children.edges.map(s => s.node.__typename));
media_type = media_types.size > 1 ? 'mixed' : type_map[media_types.values().next().value] ?? 'unknown';
}

let location = { name: '', latlong: '', city: '', location_id: '' };
if (node.location) {
location.name = node.location.name;
location.location_id = node.location.pk;
location.latlong = node.location.lat && node.location.lng ? `${node.location.lat},${node.location.lng}` : '';
location.city = node.location.city;
}

const no_likes = Boolean(node.like_and_view_counts_disabled);

const user = node.user;
const owner = node.owner;
if (user && owner) {
if (owner.id === user.id) {
// Same id; owner may contain less info (e.g. no full name, username, etc.), so prefer user
} else if (user.username !== owner.username) {
throw new MapItemException('Unable to parse item: different user and owner');
}
}

const play_count = node.view_count !== undefined ? node.view_count : node.play_count !== undefined ? node.play_count : new MissingMappedField(0);

const mapped_item = {
id: node.shortcode,
post_source_domain: normalize_url_encoding(node.__import_meta?.source_platform_url),
collected_from_view: node._zs_instagram_view ?? '',
partial_item: node._zs_partial ?? '',
timestamp: new Date(node.taken_at_timestamp * 1000).toISOString(),
thread_id: node.shortcode,
parent_id: node.shortcode,
url: 'https://www.instagram.com/p/' + node.shortcode,
body: caption,

author: user.username ?? owner.username ?? new MissingMappedField(''),
author_fullname: user.full_name ?? owner.full_name ?? new MissingMappedField(''),
is_verified: Boolean(user.is_verified),
author_avatar_url: user.profile_pic_url ?? owner.profile_pic_url ?? new MissingMappedField(''),

coauthors: new MissingMappedField(''),
coauthor_fullnames: new MissingMappedField(''),
coauthor_ids: new MissingMappedField(''),

media_type: media_type,
num_media: num_media,
image_urls: node.display_url,
media_urls: media_url,

hashtags: caption !== MissingMappedField ? caption.match(/#([^
!@#$%^&*()_+{}:"|<>?
;'
,./`~]+)/g)?.join(',') : '',
usertags: node.edge_media_to_tagged_user.edges.map(u => u.node.user.username).join(','),
play_count: play_count,
likes_hidden: no_likes ? 'yes' : 'no',
num_likes: no_likes ? new MissingMappedField(0) : node.edge_media_preview_like.count,
num_comments: node.edge_media_preview_comment?.count ?? 0,

location_name: location.name,
location_id: location.location_id,
location_latlong: location.latlong,
location_city: location.city,

unix_timestamp: node.taken_at_timestamp,
missing_media: null,
};

return mapped_item;
}

function parse_itemlist_item(node) {
const partial_item = node._zs_partial ?? false;
const num_media = node.media_type !== 8 ? 1 : node.carousel_media.length;
const caption = 'caption' in node ? (node.caption && node.caption.text) : new MissingMappedField('');

let display_urls = [];
let media_urls = [];
let missing_media = null;
const type_map = { 1: 'photo', 2: 'video' };
const media_types = new Set();

let media_nodes;
if (node.media_type === 8) {
media_nodes = node.carousel_media;
} else {
media_nodes = [node];
}

for (const media_node of media_nodes) {
if (media_node.media_type === 2) {
if ('image_versions2' in media_node) {
display_urls.push(media_node.image_versions2.candidates[0].url);
} else if ('video_versions' in media_node) {
display_urls.push(media_node.video_versions[0].url);
} else {
if (partial_item) {
// Known partial item
} else {
throw new MapItemException('Instagram item format change');
}
}

if ('video_versions' in media_node) {
media_urls.push(media_node.video_versions[0].url);
} else {
if (partial_item) {
// Known partial item
} else {
throw new MapItemException('Instagram item format change');
}
}
} else if (media_node.media_type === 1 && media_node.image_versions2) {
const media_url = media_node.image_versions2.candidates[0].url;
display_urls.push(media_url);
media_urls.push(media_url);
} else {
missing_media = new MissingMappedField('');
}

media_types.add(type_map[media_node.media_type] ?? 'unknown');
}

const media_type = media_types.size > 1 ? 'mixed' : media_types.values().next().value;

let num_comments;
if ('comment_count' in node) {
num_comments = node.comment_count;
} else if ('comments' in node && Array.isArray(node.comments)) {
num_comments = node.comments.length;
} else {
num_comments = -1;
}

let location = { name: '', latlong: '', city: '', location_id: '' };
if (node.location) {
location.name = node.location.name;
location.location_id = node.location.pk;
location.latlong = node.location.lat && node.location.lng ? `${node.location.lat},${node.location.lng}` : '';
location.city = node.location.city;
}

const user = node.user ?? {};
const owner = node.owner ?? {};
if (user && owner) {
if (owner.id === user.id) {
// Same id; owner may contain less info (e.g. no full name, username, etc.), so prefer user
} else if (user.username !== owner.username) {
throw new MapItemException('Unable to parse item: different user and owner');
}
}

let coauthors = [];
let coauthor_fullnames = [];
let coauthor_ids = [];
if (node.coauthor_producers) {
for (const coauthor_node of node.coauthor_producers) {
coauthors.push(coauthor_node.username ?? new MissingMappedField(''));
coauthor_fullnames.push(coauthor_node.full_name ?? new MissingMappedField(''));
coauthor_ids.push(coauthor_node.id);
}
}
coauthors = coauthors.join(',');
coauthor_fullnames = coauthor_fullnames.join(',');

const no_likes = Boolean(node.like_and_view_counts_disabled);

const play_count = node.view_count !== undefined ? node.view_count : node.play_count !== undefined ? node.play_count : new MissingMappedField(0);

let usertags;
if ('usertags' in node) {
usertags = node.usertags.in.map(user => user.user.username).join(',');
} else {
usertags = '';
}

const collected_at = partial_item ? new MissingMappedField(0) : new Date(node.taken_at * 1000).toISOString();
const unix_at = partial_item ? new MissingMappedField(0) : node.taken_at;

const mapped_item = {
collected_from_url: normalize_url_encoding(node.__import_meta?.source_platform_url),
collected_from_view: node._zs_instagram_view ?? '',
partial_item: node._zs_partial ?? '',
id: node.code,
timestamp: collected_at,
thread_id: node.code,
parent_id: node.code,
url: 'https://www.instagram.com/p/' + node.code,
body: caption,

author_id: user.id ?? owner.id ?? new MissingMappedField(''),
author: user.username ?? owner.username ?? new MissingMappedField(''),
author_fullname: user.full_name ?? owner.full_name ?? new MissingMappedField(''),
verified: Boolean(user.is_verified),
author_avatar_url: user.profile_pic_url ?? owner.profile_pic_url ?? new MissingMappedField(''),

coauthors: coauthors,
coauthor_fullnames: coauthor_fullnames,
coauthor_ids: coauthor_ids.join(','),

media_type: media_type,
num_media: num_media,
image_urls: display_urls.join(','),
media_urls: media_urls.join(','),

hashtags: caption !== MissingMappedField ? caption.match(/#([^
!@#$%^&*()_+{}:"|<>?
;'
,./`~]+)/g)?.join(',') : '',
usertags: usertags,
play_count: play_count,
likes_hidden: no_likes ? 'yes' : 'no',
num_likes: no_likes ? new MissingMappedField(0) : node.like_count,
num_comments: num_comments,

location_name: location.name,
location_id: location.location_id,
location_latlong: location.latlong,
location_city: location.city,

unix_timestamp: unix_at,
missing_media: missing_media,
};

return mapped_item;
}
// === end auto-generated ===