Skip to content

Commit 0458d6c

Browse files
committed
Added "Page Not Found" check for LyricsFreak provider. Fixed searching for some cases. Added Integration test.
1 parent 077e8d2 commit 0458d6c

5 files changed

Lines changed: 50 additions & 11 deletions

File tree

LyricsScraperNET/Providers/LyricsFreak/LyricsFreakProvider.cs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@ internal class LyricsFreakProvider : ExternalProviderBase
1616
{
1717
private ILogger<LyricsFreakProvider>? _logger;
1818
private readonly IExternalUriConverter _uriConverter;
19-
private readonly string LyricsHrefXPath = "//a[translate(@title, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = '{0} lyrics']";
19+
20+
private const string LyricsHrefXPath = "//a[contains(translate(@title, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{0} lyrics')]";
2021
private const string LyricsDivXPath = "//div[@data-container-id='lyrics']";
2122

23+
private const string PageNotFoundText = "#404 - Page Not Found";
24+
2225
public override IExternalProviderOptions Options { get; }
2326

2427
#region Constructors
@@ -90,11 +93,17 @@ protected override async Task<SearchResult> SearchLyricAsync(string artist, stri
9093

9194
cancellationToken.ThrowIfCancellationRequested();
9295

96+
if (htmlResponse.Contains(PageNotFoundText))
97+
{
98+
_logger?.LogWarning($"LyricsFreak. Artist's page not found (404). [{artist}]. Song name: [{song}]");
99+
return new SearchResult(Models.ExternalProviderType.LyricsFreak);
100+
}
101+
93102
// 2. Find song on the artist page and get link to the web page.
94103
var songHref = GetSongHrefFromHtmlBody(htmlResponse, song);
95104
if (string.IsNullOrEmpty(songHref))
96105
{
97-
_logger?.LogWarning($"LyricsFreak. Can't find song Uri for song: [{song}]");
106+
_logger?.LogWarning($"LyricsFreak. Can't find song Uri for artist: [{artist}]. Song name: [{song}]");
98107
return new SearchResult(Models.ExternalProviderType.LyricsFreak);
99108
}
100109
var songUri = new Uri(LyricsFreakUriConverter.BaseUrl + songHref);
@@ -104,14 +113,14 @@ protected override async Task<SearchResult> SearchLyricAsync(string artist, stri
104113

105114
protected async override Task<SearchResult> SearchLyricAsync(Uri uri, CancellationToken cancellationToken = default)
106115
{
107-
var text = await WebClient.LoadAsync(uri, cancellationToken);
116+
var htmlBodyContent = await WebClient.LoadAsync(uri, cancellationToken);
108117

109118
cancellationToken.ThrowIfCancellationRequested();
110119

111-
var songLyrics = GetSongLyricsFromHtmlBody(text);
120+
var songLyrics = GetSongLyricsFromHtmlBody(htmlBodyContent);
112121
if (string.IsNullOrEmpty(songLyrics))
113122
{
114-
_logger?.LogWarning($"LyricsFreak. Can't find song lyrics for song uri: [{uri.AbsoluteUri}]");
123+
_logger?.LogWarning($"LyricsFreak. Can't find lyrics for song's uri: [{uri}]");
115124
return new SearchResult(Models.ExternalProviderType.LyricsFreak);
116125
}
117126

@@ -126,12 +135,20 @@ protected async override Task<SearchResult> SearchLyricAsync(Uri uri, Cancellati
126135

127136
private string GetSongHrefFromHtmlBody(string htmlBody, string song)
128137
{
138+
// Encoded needed for songs like "Devil's Calling". Title in htmlBody will be: "Devil&#039;s Calling Lyrics"
129139
string formattedXPath = string.Format(LyricsHrefXPath, GetEncodedSong(song));
130-
var linkNode = htmlBody.SelectSingleNodeByXPath(formattedXPath);
140+
141+
// In other cases tried lowercase search of the original song name.
142+
// Example. Artist: "Zé Ramalho". Song: "Batendo Na Porta Do Céu (Versão II)"
143+
string originalXPath = string.Format(LyricsHrefXPath, song.ToLowerInvariant());
144+
145+
var linkNode = htmlBody.SelectSingleNodeByXPath(formattedXPath)
146+
?? (!song.Contains("'")
147+
? htmlBody.SelectSingleNodeByXPath(originalXPath)
148+
: null);
149+
131150
if (linkNode == null)
132-
{
133151
return string.Empty;
134-
}
135152

136153
string hrefSong = linkNode.GetAttributeValue("href", string.Empty);
137154
return hrefSong;
@@ -140,11 +157,10 @@ private string GetSongHrefFromHtmlBody(string htmlBody, string song)
140157
private string GetSongLyricsFromHtmlBody(string htmlBody)
141158
{
142159
var lyricsNode = htmlBody.SelectSingleNodeByXPath(LyricsDivXPath);
160+
143161
if (lyricsNode == null)
144-
{
145162
return string.Empty;
146163

147-
}
148164
string lyricsText = lyricsNode.InnerText.Trim();
149165
return lyricsText;
150166
}

LyricsScraperNET/Providers/LyricsFreak/LyricsFreakUriConverter.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ public Uri GetArtistUri(string artist)
1717
{
1818
var artistFormatted = artist.ToLowerInvariant().СonvertToPlusFormat(removeProhibitedSymbols: true);
1919
return new Uri(string.Format(uriArtistPathFormat, artistFormatted.First(c => c != '+'), artistFormatted));
20-
2120
}
2221

2322
public Uri GetLyricUri(string artist, string song)

Tests/LyricsScraperNET.IntegrationTest/LyricsScraperNET.IntegrationTest.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@
5656
<None Update="Providers\LyricsFreak\Resources\Lyrics_Result_01.txt">
5757
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
5858
</None>
59+
<None Update="Providers\LyricsFreak\Resources\Lyrics_Result_03.txt">
60+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
61+
</None>
5962
<None Update="Providers\LyricsFreak\Resources\Lyrics_Result_02.txt">
6063
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
6164
</None>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Mãe, tire essas algemas de mim
2+
Me proteja com o seu véyou
3+
Está escuro demais pra ver
4+
Me sinto até batendo na porta do céyou
5+
6+
Bate, Bate, Bate na porta do céyou
7+
Bate, Bate, Bate na porta do céyou
8+
9+
Mãe, tire essas armas pra mim
10+
A camisa e o chapéyou
11+
A grande nuvem escura já me envolveu
12+
Me sinto até batendo na porta do céyou
13+
14+
Bate, bate, bate, bate na porta do céyou
15+
Bate, bate, bate na porta do céyou

Tests/LyricsScraperNET.IntegrationTest/Providers/LyricsFreak/lyric_test_data.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
"SongName": "Wishing Wells",
1212
"SongUri": null
1313
},
14+
{
15+
"LyricResultPath": "Providers/LyricsFreak/Resources/Lyrics_Result_03.txt",
16+
"ArtistName": "Zé Ramalho",
17+
"SongName": "Batendo Na Porta Do Céu (Versão II)",
18+
"SongUri": null
19+
},
1420
{
1521
"LyricResultPath": "Providers/LyricsFreak/Resources/Lyrics_Result_01.txt",
1622
"ArtistName": null,

0 commit comments

Comments
 (0)