From 91ac0e2385422835ca7346e008c2097690ef101e Mon Sep 17 00:00:00 2001 From: Gervasio Marchand Date: Sat, 4 Feb 2023 18:29:44 -0300 Subject: [PATCH] If we can't pull the robots information, don't break completely (#37) --- src/Program.cs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Program.cs b/src/Program.cs index 29cacc1..4d6c5cd 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -41,7 +41,14 @@ await Parallel.ForEachAsync(Config.Instance.Sites, new ParallelOptions { MaxDegreeOfParallelism = Config.Instance.Sites.Length }, async (site, _) => { - sitesRobotFile[site.Host] = await robotsFileParser.FromUriAsync(new Uri($"http://{site.Host}/robots.txt")); + try + { + sitesRobotFile[site.Host] = await robotsFileParser.FromUriAsync(new Uri($"http://{site.Host}/robots.txt")); + } + catch + { + Console.WriteLine($"Ignoring {site.Host} because had issues fetching its robots data (is the site down?)"); + } } ); @@ -88,6 +95,11 @@ await Parallel.ForEachAsync(sitesTags, new ParallelOptions{MaxDegreeOfParallelis return; } } + else + { + Console.WriteLine($"Not scraping {url} because I couldn't fetch robots data."); + return; + } HttpResponseMessage? response = null; try