Make it possible to scrape followed tags from the instance (#6)

This commit is contained in:
Gervasio Marchand 2022-12-17 17:27:04 -08:00 committed by GitHub
parent 79177f8bf9
commit 4ccfaad15f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 9 deletions

View File

@ -10,15 +10,18 @@ public class Config
public string ImportedPath { get; }
public string FakeRelayUrl { get; }
public string FakeRelayApiKey { get; }
public string? MastodonPostgresConnectionString { get; }
public ImmutableArray<string> Tags { get; }
public ImmutableArray<SiteData> Sites { get; }
private Config(string importedPath, string fakeRelayUrl, string fakeRelayApiKey, ImmutableArray<string> tags, ImmutableArray<SiteData> sites)
private Config(string importedPath, string fakeRelayUrl, string fakeRelayApiKey, string? mastodonPostgresConnectionString,
ImmutableArray<string> tags, ImmutableArray<SiteData> sites)
{
ImportedPath = importedPath;
FakeRelayUrl = fakeRelayUrl;
FakeRelayApiKey = fakeRelayApiKey;
MastodonPostgresConnectionString = mastodonPostgresConnectionString;
Tags = tags;
Sites = sites;
}
@ -30,20 +33,27 @@ public class Config
return;
}
var data = JsonSerializer.Deserialize<ConfigData>(File.ReadAllText(path), JsonContext.Default.ConfigData);
var data = JsonSerializer.Deserialize(File.ReadAllText(path), JsonContext.Default.ConfigData);
var importedPath = Path.Join(Path.GetDirectoryName(path), "imported.txt");
var apiKey = string.IsNullOrEmpty(data.FakeRelayApiKey)
? Environment.GetEnvironmentVariable("FAKERELAY_APIKEY")
: data.FakeRelayApiKey;
Instance = new Config(importedPath, data.FakeRelayUrl, apiKey, data.Tags.ToImmutableArray(), data.ImmutableSites);
if (apiKey == null)
{
throw new Exception("The api key is missing");
}
Instance = new Config(importedPath, data.FakeRelayUrl, apiKey, data.MastodonPostgresConnectionString,
data.Tags.ToImmutableArray(), data.ImmutableSites);
}
public class ConfigData
{
public string FakeRelayUrl { get; set; }
public string? FakeRelayApiKey { get; set; }
public string? MastodonPostgresConnectionString { get; set; }
public string[] Tags { get; set; }
public InternalSiteData[]? Sites { get; set; }

View File

@ -7,4 +7,8 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Npgsql" Version="7.0.1" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,23 @@
using Npgsql;
namespace GetMoarFediverse;
public static class MastodonConnectionHelper
{
public static async Task<List<string>> GetFollowedTagsAsync()
{
var res = new List<string>();
await using var conn = new NpgsqlConnection(Config.Instance.MastodonPostgresConnectionString);
await conn.OpenAsync();
await using (var cmd = new NpgsqlCommand("SELECT DISTINCT tags.name FROM tag_follows JOIN tags ON tag_id = tags.id ORDER BY tags.name ASC;", conn))
await using (var reader = await cmd.ExecuteReaderAsync())
{
while (await reader.ReadAsync())
res.Add(reader.GetString(0));
}
return res;
}
}

View File

@ -26,11 +26,22 @@ var importedList = File.ReadAllLines(importedPath).ToList();
var imported = importedList.ToHashSet();
var statusesToLoadBag = new ConcurrentBag<string>();
var sitesTags = Config.Instance.Sites
.SelectMany(s => Config.Instance.Tags.Select(tag => (s.Host, tag)))
.Concat(Config.Instance.Sites.SelectMany(s => s.SiteSpecificTags.Select(tag => (s.Host, tag))))
.OrderBy(t => t.tag)
.ToList();
List<(string host, string tag)> sitesTags;
if (string.IsNullOrEmpty(Config.Instance.MastodonPostgresConnectionString))
{
sitesTags = Config.Instance.Sites
.SelectMany(s => Config.Instance.Tags.Select(tag => (s.Host, tag)))
.Concat(Config.Instance.Sites.SelectMany(s => s.SiteSpecificTags.Select(tag => (s.Host, tag))))
.OrderBy(t => t.tag)
.ToList();
}
else
{
var tags = await MastodonConnectionHelper.GetFollowedTagsAsync();
sitesTags = Config.Instance.Sites
.SelectMany(s => tags.Select(t => (s.Host, t)))
.ToList();
}
ParallelOptions parallelOptions = new()
{
@ -54,7 +65,7 @@ await Parallel.ForEachAsync(sitesTags, parallelOptions, async (st, _) =>
}
var json = await response.Content.ReadAsStringAsync();
var data = JsonSerializer.Deserialize<TagResponse>(json, CamelCaseJsonContext.Default.TagResponse);
var data = JsonSerializer.Deserialize(json, CamelCaseJsonContext.Default.TagResponse);
foreach (var statusLink in data.OrderedItems.Where(i=>!imported.Contains(i)))
{