From b8b9141a3cfbebc70658f0d3a68439f84be0032e Mon Sep 17 00:00:00 2001 From: Ricardo Felgueiras Date: Thu, 9 Jan 2025 15:04:46 +0000 Subject: [PATCH] feat(linkedin): adapt to the new html --- e2e/__snapshots__/scrappers.test.ts.snap | 20 ++++++++++---------- e2e/linkedin-search-people/index.html | 2 +- src/scrappers/linkedin-my-network.yml | 23 +++++++++++++++++++++++ src/scrappers/linkedin.yml | 16 +++++++--------- 4 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 src/scrappers/linkedin-my-network.yml diff --git a/e2e/__snapshots__/scrappers.test.ts.snap b/e2e/__snapshots__/scrappers.test.ts.snap index 473484da..f8d51810 100644 --- a/e2e/__snapshots__/scrappers.test.ts.snap +++ b/e2e/__snapshots__/scrappers.test.ts.snap @@ -380,16 +380,16 @@ Senior Frontend Developer Spinks Lisbon, Portugal (Hybrid) https://www.linkedin. exports[`RowsX - scrappers tests Scrapping - linkedin-search-people 1`] = ` "Avatar Name Job Location Profile url -=IMAGE("https://media.licdn.com/dms/image/C4E03AQH07GJ8dLm07A/profile-displayphoto-shrink_100_100/0/1582636213281?e=1711584000&v=beta&t=crNJgxo5FVDBzQR0Pberds2L2JEFG62Z7KVIxJhOTO0") Nuno Veloso Head of Engineering at FARFETCH Porto Metropolitan Area https://www.linkedin.com/in/nveloso -=IMAGE("https://media.licdn.com/dms/image/D4D03AQFG6JYq4SkJrA/profile-displayphoto-shrink_100_100/0/1676977343592?e=1711584000&v=beta&t=j8jFUNLFooXPLOSSFH73fYdfnLrSe3ogh0vXerpqz9s") Mariana Gomes Product Design Lead | ex-MetaLab | prev Lead Designer @Rows Porto Metropolitan Area https://www.linkedin.com/in/marianasrgomes -=IMAGE("https://media.licdn.com/dms/image/C4E03AQH2SzQEEmz2dQ/profile-displayphoto-shrink_100_100/0/1608721946513?e=1711584000&v=beta&t=lOZoH48EuRVsktudmQg8yC1oHf2Nm__fLRjZ_2WgeCg") Vania Pinto Correia Head of People at Translucent Porto https://www.linkedin.com/in/vania-pinto-correia -=IMAGE("https://media.licdn.com/dms/image/C5603AQHL5GtkRBMXrQ/profile-displayphoto-shrink_100_100/0/1624445216803?e=1711584000&v=beta&t=BiXzDT4lbNakO8bGoN5wy5OnOXy1EWBiIx3PE3EAtaQ") Álvaro Samagaio Co-Host at Founder Tales Podcast | Growth and Data at Rows | Primus Inter Pares 2021 Winner | Biomedical Eng. Porto Metropolitan Area https://www.linkedin.com/in/alvarosamagaio -=IMAGE("https://media.licdn.com/dms/image/C4E03AQFznf1RlVnaoA/profile-displayphoto-shrink_100_100/0/1631964992025?e=1711584000&v=beta&t=5harvi1GA2pQinrf_aPRxBqaz8ON4J_I-YRIMvPyBAM") Miguel Freitas Product Designer @ Rows Porto https://www.linkedin.com/in/miguelmfreitas - Tiago Cardoso Software Engineer at Rows Portugal https://www.linkedin.com/in/tiagobluemelcardoso -=IMAGE("https://media.licdn.com/dms/image/C4D03AQGVMsRxeiNDuQ/profile-displayphoto-shrink_100_100/0/1516894399474?e=1711584000&v=beta&t=q2Qkm-irKU4qRcAQd3JsFZo86iTEsFzP_7q6ljiG3bM") João Caxias Silva Analytics Engineer Maia https://www.linkedin.com/in/jcaxias -=IMAGE("https://media.licdn.com/dms/image/C4D03AQFN_iKplP0Daw/profile-displayphoto-shrink_100_100/0/1614075170531?e=1711584000&v=beta&t=x-xd_ANe72Zp85-9uykow2wgE4SId-E-XFCnsEixfxo") Bruno Sotto-Mayor Pinto Frontend Developer at Rows Porto https://www.linkedin.com/in/brpinto -=IMAGE("https://media.licdn.com/dms/image/C4E03AQGd7fTteAFuWQ/profile-displayphoto-shrink_100_100/0/1516274284771?e=1711584000&v=beta&t=bFwU54fBF_4ZcGIIURVh8sIoECJyGCYaWU5-cBvOzhI") Rui Maranhao Abreu Research Software Engineer @ Meta & Professor of Software Engineering @ U.Porto San Francisco Bay Area https://www.linkedin.com/in/ruimaranhao -=IMAGE("https://media.licdn.com/dms/image/C4E03AQFNQOVNKdPT6Q/profile-displayphoto-shrink_100_100/0/1517478935400?e=1711584000&v=beta&t=WiOfYeX-v8tyw9hTtSk6VCMOnbBy_YTuoqsEmvLDtpw") Pedro Ferreira Senior Backend Engineer na Rows Porto Metropolitan Area https://www.linkedin.com/in/pedro-ferreira-010982a7" +=IMAGE("https://media.licdn.com/dms/image/v2/D4D03AQFN0ccgRQ2rCw/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1722274955089?e=1741824000&v=beta&t=Dn7xKcr3s57c-m5YV3104y-R_dMNI-xGdDO4V1Xli3E") Pedro Pereira Senior Software Engineer - Dapps Porto Metropolitan Area https://www.linkedin.com/in/pedro-pereira-b2b04236 +=IMAGE("https://media.licdn.com/dms/image/v2/D4D03AQFG6JYq4SkJrA/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1676977343592?e=1741824000&v=beta&t=vQCX7tTCtmpsEUScxxBuH3Y1yXxZLVZBz05mjmogjz0") Mariana Gomes Lead Product Designer | ex-MetaLab Porto Metropolitan Area https://www.linkedin.com/in/marianasrgomes +=IMAGE("https://media.licdn.com/dms/image/v2/D4D03AQEHMuHHb1HQrw/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1724232062313?e=1741824000&v=beta&t=XRf9MSHP_cRRSqBfWTd_l55mnPD0LnnXfUn-72f502U") Tiago Andrade Product Designer at Motion Porto https://www.linkedin.com/in/tiagovandrade +=IMAGE("https://media.licdn.com/dms/image/v2/C4E03AQFCwWGQBw1Vbg/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1517695524467?e=1741824000&v=beta&t=aihTsgaD95DEyJNPgLrMAcg9WjtDh5hJxpvj0h9E054") Pedro Trabulo Senior Software Engineer Braga https://www.linkedin.com/in/trabulo +=IMAGE("https://media.licdn.com/dms/image/v2/C4D03AQGVMsRxeiNDuQ/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1516894399474?e=1741824000&v=beta&t=ZHteYWgjMzENO-G6fFoB9DRGb4yUYKLBSxJmHnEnNsE") João Caxias Silva Analytics Engineer Maia https://www.linkedin.com/in/jcaxias +=IMAGE("https://media.licdn.com/dms/image/v2/C5603AQG6OyL90-lDLQ/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1560353663382?e=1741824000&v=beta&t=ApoQWZr4fh60jY9uOSnMjHEHL1nfk6Mo4AV0v_nVF5Y") Dominik Cholewski Staff Technical Writer at Belvo Porto https://www.linkedin.com/in/dominik-cholewski +=IMAGE("https://media.licdn.com/dms/image/v2/C5603AQFc7aX_mHJE_w/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1517723786288?e=1741824000&v=beta&t=QizGcNCDyHVcqewFhoF37T2IZPeYH6FP2Bqh8sIbz1s") Michail Karamanos Principal Engineer - Big Data 🥋 Aveiro https://www.linkedin.com/in/michail-karamanos +=IMAGE("https://media.licdn.com/dms/image/v2/C4E03AQH0D4fEz1b4Nw/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1605259221389?e=1741824000&v=beta&t=rA8nLIM-lFOXGO1Yf0tKgCuDje5EhigFvbMxtyKmqoo") Ricardo Gabiola Santamaria Senior Finance Manager Berlin https://www.linkedin.com/in/ricardo-gabiola-santamaria-1335094a +=IMAGE("https://media.licdn.com/dms/image/v2/D4D03AQEWiiAAqIfsGg/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1699112682785?e=1741824000&v=beta&t=uQKWuffKCXgAzcMQcViSLgqogI6kK3TYHojWKi50QrI") Victor Botamedi Senior Software Engineer | Tech Lead | Flutter Porto https://www.linkedin.com/in/victor-botamedi +=IMAGE("https://media.licdn.com/dms/image/v2/C4E03AQGd7fTteAFuWQ/profile-displayphoto-shrink_100_100/profile-displayphoto-shrink_100_100/0/1516274284771?e=1741824000&v=beta&t=76nZtAF-ZJPPb7uOYF-kT8LgyfXRpmrMnmOKQW00-0w") Rui Maranhao Abreu Research Software Engineer @ Meta & Professor of Software Engineering @ U.Porto San Francisco Bay Area https://www.linkedin.com/in/ruimaranhao" `; exports[`RowsX - scrappers tests Scrapping - netflix 1`] = ` diff --git a/e2e/linkedin-search-people/index.html b/e2e/linkedin-search-people/index.html index 13d9f0f0..12606e41 100644 --- a/e2e/linkedin-search-people/index.html +++ b/e2e/linkedin-search-people/index.html @@ -1 +1 @@ - + \ No newline at end of file diff --git a/src/scrappers/linkedin-my-network.yml b/src/scrappers/linkedin-my-network.yml new file mode 100644 index 00000000..527a1ead --- /dev/null +++ b/src/scrappers/linkedin-my-network.yml @@ -0,0 +1,23 @@ +url: https://www.linkedin.com/mynetwork/invite-connect/connections/ +header: Linkedin search results +listElementsQuery: '[data-chameleon-result-urn*="urn:li:member:"], .mn-connection-card' +elementParser: + - title: Avatar + query: img + type: image + + - title: Name + query: '.entity-result__title-text > .app-aware-link span[aria-hidden="true"], .mn-connection-card__name' + type: text + + - title: Job + query: .entity-result__primary-subtitle, .mn-connection-card__occupation + type: text + + - title: Location + query: .entity-result__secondary-subtitle + type: text + + - title: Profile url + query: .entity-result__title-text > .app-aware-link, .mn-connection-card__link + type: clean-url diff --git a/src/scrappers/linkedin.yml b/src/scrappers/linkedin.yml index 4c641664..6f77d0f1 100644 --- a/src/scrappers/linkedin.yml +++ b/src/scrappers/linkedin.yml @@ -1,25 +1,23 @@ -url: - - https://www.linkedin.com/search/results/* - - https://www.linkedin.com/mynetwork/invite-connect/connections/ +url: https://www.linkedin.com/search/results/* header: Linkedin search results -listElementsQuery: '[data-chameleon-result-urn*="urn:li:member:"], .mn-connection-card' +listElementsQuery: 'ul[role="list"] > li > div > div > div' elementParser: - title: Avatar - query: img + query: 'div > img' type: image - title: Name - query: '.entity-result__title-text > .app-aware-link span[aria-hidden="true"], .mn-connection-card__name' + query: 'div:nth-child(2) > div > div > div > span > span > a > span > span' type: text - title: Job - query: .entity-result__primary-subtitle, .mn-connection-card__occupation + query: 'div:nth-child(2) > div > div:nth-child(2)' type: text - title: Location - query: .entity-result__secondary-subtitle + query: 'div:nth-child(2) > div > div:nth-child(3)' type: text - title: Profile url - query: .entity-result__title-text > .app-aware-link, .mn-connection-card__link + query: 'div > a' type: clean-url