Skip to content

Commit 9c53acf

Browse files
committed
#48 Remove lyrics header information from the result for Genius provider
1 parent 11c1de9 commit 9c53acf

2 files changed

Lines changed: 665 additions & 620 deletions

File tree

LyricsScraperNET/Providers/Genius/GeniusProvider.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ public sealed class GeniusProvider : ExternalProviderBase
2525
// Format: "artist song". Example: "Parkway Drive Carrion".
2626
private const string GeniusSearchQueryFormat = "{0} {1}";
2727

28+
// Lyrics header. Note: Should be skipped in result
29+
private const string _lyricsHeaderNodesXPath = "//div[contains(@class, 'LyricsHeader')]";
30+
2831
private const string _referentFragmentNodesXPath = "//a[contains(@class, 'ReferentFragmentVariantdesktop') or contains(@class, 'ReferentFragmentdesktop') or contains(@class, 'ReferentFragment-desktop')]";
2932
private const string _lyricsContainerNodesXPath = "//div[@data-lyrics-container]";
3033

@@ -179,6 +182,12 @@ private string GetParsedLyricFromHtmlPageBody(string htmlPageBody, out bool inst
179182
foreach (HtmlNode spanNode in spanNodes)
180183
spanNode.Remove();
181184

185+
// Lyric's header node with additional information should be skipped.
186+
var lyricsHeaderNodes = htmlDocument.DocumentNode.SelectNodes(_lyricsHeaderNodesXPath);
187+
if (lyricsHeaderNodes != null)
188+
foreach (HtmlNode headerNode in lyricsHeaderNodes)
189+
headerNode.Remove();
190+
182191
var lyricNodes = htmlDocument.DocumentNode.SelectNodes(_lyricsContainerNodesXPath);
183192
if (lyricNodes == null)
184193
{

0 commit comments

Comments
 (0)