If we can't pull the robots information, don't break completely (#37)

This commit is contained in:
Gervasio Marchand 2023-02-04 18:29:44 -03:00 committed by GitHub
parent 32280c334b
commit 91ac0e2385
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -41,7 +41,14 @@ await Parallel.ForEachAsync(Config.Instance.Sites,
new ParallelOptions { MaxDegreeOfParallelism = Config.Instance.Sites.Length },
async (site, _) =>
{
sitesRobotFile[site.Host] = await robotsFileParser.FromUriAsync(new Uri($"http://{site.Host}/robots.txt"));
try
{
sitesRobotFile[site.Host] = await robotsFileParser.FromUriAsync(new Uri($"http://{site.Host}/robots.txt"));
}
catch
{
Console.WriteLine($"Ignoring {site.Host} because had issues fetching its robots data (is the site down?)");
}
}
);
@ -88,6 +95,11 @@ await Parallel.ForEachAsync(sitesTags, new ParallelOptions{MaxDegreeOfParallelis
return;
}
}
else
{
Console.WriteLine($"Not scraping {url} because I couldn't fetch robots data.");
return;
}
HttpResponseMessage? response = null;
try