Addresses issue #26 - getting tags via the Mastodon API. (#36)

This commit is contained in:
Chris Wood 2023-02-14 12:30:01 +00:00 committed by GitHub
parent fbd47ae7cd
commit 6dcc1a0327
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 307 additions and 166 deletions

4
.gitignore vendored
View File

@ -8,4 +8,6 @@ launchSettings.json
.DS_Store
data/
output/
*.DotSettings.user
*.DotSettings.user
config.json
imported.txt

View File

@ -14,7 +14,7 @@ The `FakeRelayApiKey` on the `config.json` is optional. If you don't provide one
This `config.json` pulls two tags from two instances:
```
```json
{
"FakeRelayUrl": "https://fakerelay.gervas.io",
"FakeRelayApiKey": "1TxL6m1Esx6tnv4EPxscvAmdQN7qSn0nKeyoM7LD8b9mz+GNfrKaHiWgiT3QcNMUA+dWLyWD8qyl1MuKJ+4uHA==",
@ -23,11 +23,11 @@ This `config.json` pulls two tags from two instances:
}
```
### Downloading all the followed hashtags of your instance
### Downloading all the followed hashtags of your instance using the database
You can pass `MastodonPostgresConnectionString` with a connection string to your postgres database and GetMoarFediverse will download content for all the hashtags the users on your server follow. Here's an example:
```
```json
{
"FakeRelayUrl": "https://fakerelay.gervas.io",
"FakeRelayApiKey": "1TxL6m1Esx6tnv4EPxscvAmdQN7qSn0nKeyoM7LD8b9m+GNfrKaHiWgiT3QcNMUA+dWLyWD8qyl1MuKJ+4uHA==",
@ -40,7 +40,7 @@ You can pass `MastodonPostgresConnectionString` with a connection string to your
If you add `"PinnedTags": true`, you can also populate the hashtags pinned by your users :) thanks [@nberlee](https://github.com/nberlee), this is great!
```
```json
{
"FakeRelayUrl": "https://fakerelay.gervas.io",
"FakeRelayApiKey": "1TxL6m1Esx6tnv4EPxscvAmdQN7qSn0nKeyoM7LD8b9m+GNfrKaHiWgiT3QcNMUA+dWLyWD8qyl1MuKJ+4uHA==",
@ -50,6 +50,33 @@ If you add `"PinnedTags": true`, you can also populate the hashtags pinned by yo
}
```
### Downloading the hashtags followed by users via the API
You can pass an `Api` object and GetMoarFediverse will download content for all the hashtags for each user for whom an access token is provided. Here's an example:
```json
{
"FakeRelayUrl": "https://foo.example",
"FakeRelayApiKey": "blah==",
"Api": {
"Url": "https://mastodon.example/api/",
"Tokens": [
{
"Owner": "Chris",
"Token": "1413D6izFoQdu0x00000DZ9ufcBvhOt7hoxuctHg2c"
}
]
},
"Instances": [ "hachyderm.io", "mastodon.social" ]
}
```
For the `Tokens` array items, both `Owner` and `Token` are required fields. Owner can be any non-empty string that would identify the owner of the token (e.g. could be the Mastodon username, app client ID, etc). This data structure allows multiple user accounts to be supported.
To create an access token for the config file, visit the web interface of your Mastodon instance and go to `/settings/applications` (Settings > Development). The token only requires `read:follows` scope. Then copy the access token shown at the top of the screen.
> If a database connection string is also provided via `MastodonPostgresConnectionString`, the tags will be retrieved via the database and any API-related settings will be ignored.
## How can I run it?
There are many ways for you to run GetMoarFediverse:

View File

@ -1,119 +0,0 @@
using System.Collections.Immutable;
using System.Text.Json;
namespace GetMoarFediverse;
public class Config
{
public static Config? Instance { get; private set; }
public string ImportedPath { get; }
public string FakeRelayUrl { get; }
public string FakeRelayApiKey { get; }
public string? MastodonPostgresConnectionString { get; }
public bool PinnedTags { get; }
public ImmutableArray<string> Tags { get; }
public ImmutableArray<SiteData> Sites { get; }
private Config(string importedPath, string fakeRelayUrl, string fakeRelayApiKey, string? mastodonPostgresConnectionString,
bool pinnedTags, ImmutableArray<string> tags, ImmutableArray<SiteData> sites)
{
ImportedPath = importedPath;
FakeRelayUrl = fakeRelayUrl;
FakeRelayApiKey = fakeRelayApiKey;
MastodonPostgresConnectionString = mastodonPostgresConnectionString;
PinnedTags = pinnedTags;
Tags = tags;
Sites = sites;
}
public static void Init(string path)
{
if (Instance != null)
{
return;
}
var data = JsonSerializer.Deserialize(File.ReadAllText(path), JsonContext.Default.ConfigData);
if (data == null)
{
throw new Exception("Could not deserialize the config file");
}
var importedPath = Path.Join(Path.GetDirectoryName(path), "imported.txt");
var apiKey = string.IsNullOrEmpty(data.FakeRelayApiKey)
? Environment.GetEnvironmentVariable("FAKERELAY_APIKEY")
: data.FakeRelayApiKey;
if (apiKey == null)
{
throw new Exception("The api key is missing");
}
if (data.Sites is { Length: > 0 })
{
Console.WriteLine("|============================================================|");
Console.WriteLine("| Warning: Sites is deprecated, please use Instances instead |");
Console.WriteLine("|============================================================|\n");
}
data.Tags ??= Array.Empty<string>();
if (data.MastodonPostgresConnectionString.HasValue() && data.Tags.Length > 0)
{
throw new Exception("You can't specify both MastodonPostgresConnectionString and Tags");
}
if (data.FakeRelayUrl.IsNullOrEmpty())
{
throw new Exception("Missing FakeRelayUrl");
}
Instance = new Config(importedPath, data.FakeRelayUrl, apiKey, data.MastodonPostgresConnectionString,
data.PinnedTags, data.Tags.ToImmutableArray(), data.GetImmutableSites());
}
public class ConfigData
{
public string? FakeRelayUrl { get; set; }
public string? FakeRelayApiKey { get; set; }
public string? MastodonPostgresConnectionString { get; set; }
public bool PinnedTags { get; set; }
public string[]? Instances { get; set; }
public string[]? Tags { get; set; }
public InternalSiteData[]? Sites { get; set; }
public ImmutableArray<SiteData> GetImmutableSites()
{
// the plan is to stop supporting Sites in favor of Instances. SiteSpecificTags add complexity and
// don't make sense when pulling tags from Mastodon. Also, pulling is fast and multi threaded!
if (Instances != null)
{
return Instances
.Select(i => new SiteData(i, ImmutableArray<string>.Empty))
.ToImmutableArray();
}
return Sites == null
? ImmutableArray<SiteData>.Empty
: Sites.Select(s => s.ToSiteData())
.ToImmutableArray();
}
public class InternalSiteData
{
public InternalSiteData(string host, string[]? siteSpecificTags)
{
Host = host;
SiteSpecificTags = siteSpecificTags;
}
public string Host { get; }
public string[]? SiteSpecificTags { get; }
public SiteData ToSiteData() =>
new(Host, SiteSpecificTags?.ToImmutableArray() ?? ImmutableArray<string>.Empty);
}
}
public record SiteData(string Host, ImmutableArray<string> SiteSpecificTags);
}

View File

@ -0,0 +1,20 @@
using System.Collections.Immutable;
namespace GetMoarFediverse.Configuration;
public record Config(
string ImportedPath,
string FakeRelayUrl,
string FakeRelayApiKey,
string? MastodonPostgresConnectionString,
MastodonApi? Api,
bool PinnedTags,
ImmutableArray<string> Tags,
ImmutableArray<SiteData> Sites
);
public record MastodonApi(string Url, ImmutableArray<MastodonApiAccessToken> Tokens);
public record MastodonApiAccessToken(string Owner, string Token);
public record SiteData(string Host, ImmutableArray<string> SiteSpecificTags);

View File

@ -0,0 +1,13 @@
using GetMoarFediverse.Configuration.Unsafe;
namespace GetMoarFediverse.Configuration;
public static class Context
{
public static Config Configuration { get; private set; } = null!;
public static void Load(string filename)
{
Configuration = UnsafeConfig.ToConfig(filename);
}
}

View File

@ -0,0 +1,80 @@
using System.Collections.Immutable;
using System.Text.Json;
namespace GetMoarFediverse.Configuration.Unsafe;
public class UnsafeConfig
{
public string? ImportedPath { get; set; }
public string? FakeRelayUrl { get; set; }
public string? FakeRelayApiKey { get; set; }
public string? MastodonPostgresConnectionString { get; set; }
public UnsafeMastodonApi? Api { get; set; }
public bool PinnedTags { get; set; }
public string[]? Instances { get; set; }
public string[]? Tags { get; set; }
public UnsafeSiteData[]? Sites { get; set; }
public static Config ToConfig(string path)
{
var data = JsonSerializer.Deserialize(File.ReadAllText(path), JsonContext.Default.UnsafeConfig);
if (data == null)
{
throw new Exception("Could not deserialize the config file");
}
data.ImportedPath = Path.Join(Path.GetDirectoryName(path), "imported.txt");
data.FakeRelayApiKey ??= Environment.GetEnvironmentVariable("FAKERELAY_APIKEY");
if (data.FakeRelayApiKey == null)
{
throw new Exception("The api key is missing");
}
if (data.Sites is { Length: > 0 })
{
Console.WriteLine("|============================================================|");
Console.WriteLine("| Warning: Sites is deprecated, please use Instances instead |");
Console.WriteLine("|============================================================|\n");
}
data.Tags ??= Array.Empty<string>();
if ((data.MastodonPostgresConnectionString.HasValue() || data.Api != null) && data.Tags.Length > 0)
{
throw new Exception("You can't specify both MastodonPostgresConnectionString / API and Tags");
}
if (data.FakeRelayUrl.IsNullOrEmpty())
{
throw new Exception("Missing FakeRelayUrl");
}
return new Config(
data.ImportedPath!,
data.FakeRelayUrl!,
data.FakeRelayApiKey!,
data.MastodonPostgresConnectionString,
data.Api?.ToMastodonApi(),
data.PinnedTags,
data.Tags?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
data.GetImmutableSites());
}
private ImmutableArray<SiteData> GetImmutableSites()
{
// the plan is to stop supporting Sites in favor of Instances. SiteSpecificTags add complexity and
// don't make sense when pulling tags from Mastodon. Also, pulling is fast and multi threaded!
if (Instances != null)
{
return Instances
.Select(i => new SiteData(i, ImmutableArray<string>.Empty))
.ToImmutableArray();
}
return Sites == null
? ImmutableArray<SiteData>.Empty
: Sites.Select(s => s.ToSiteData())
.ToImmutableArray();
}
}

View File

@ -0,0 +1,23 @@
using System.Collections.Immutable;
namespace GetMoarFediverse.Configuration.Unsafe;
public class UnsafeMastodonApi
{
public string? Url { get; set; }
public UnsafeMastodonApiAccessToken[]? Tokens { get; set; }
public MastodonApi ToMastodonApi()
{
if (Url.IsNullOrEmpty())
throw new Exception("A valid Url must be provided for the Api");
if (!Url!.EndsWith("/api/"))
throw new Exception("The Url must end with /api/");
return new MastodonApi(Url!,
Tokens == null
? ImmutableArray<MastodonApiAccessToken>.Empty
: Tokens.Select(t => t.ToAccessToken()).ToImmutableArray());
}
}

View File

@ -0,0 +1,15 @@
namespace GetMoarFediverse.Configuration.Unsafe;
public class UnsafeMastodonApiAccessToken
{
public string? Owner { get; set; }
public string? Token { get; set; }
public MastodonApiAccessToken ToAccessToken()
{
if (Owner.IsNullOrEmpty() || Token.IsNullOrEmpty())
throw new Exception("An Owner and Token must both be specified for an API Access Token.");
return new MastodonApiAccessToken(Owner!, Token!);
}
}

View File

@ -0,0 +1,17 @@
using System.Collections.Immutable;
namespace GetMoarFediverse.Configuration.Unsafe;
public class UnsafeSiteData
{
public UnsafeSiteData(string host, string[]? siteSpecificTags)
{
Host = host;
SiteSpecificTags = siteSpecificTags;
}
public string Host { get; }
public string[]? SiteSpecificTags { get; }
public SiteData ToSiteData() =>
new(Host, SiteSpecificTags?.ToImmutableArray() ?? ImmutableArray<string>.Empty);
}

View File

@ -1,13 +1,16 @@
using System.Text.Json.Serialization;
using GetMoarFediverse.Configuration.Unsafe;
using GetMoarFediverse.Responses;
namespace GetMoarFediverse;
[JsonSerializable(typeof(Config.ConfigData))]
[JsonSerializable(typeof(UnsafeConfig))]
internal partial class JsonContext : JsonSerializerContext
{
}
[JsonSerializable(typeof(Status[]))]
[JsonSerializable(typeof(StatusResponse[]))]
[JsonSerializable(typeof(FollowedTag[]))]
[JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
internal partial class CamelCaseJsonContext : JsonSerializerContext
{

View File

@ -1,5 +1,7 @@
using System.Net.Http.Headers;
using Npgsql;
using System.Text.Json;
using GetMoarFediverse.Configuration;
namespace GetMoarFediverse;
@ -7,13 +9,56 @@ public static class MastodonConnectionHelper
{
public static async Task<List<string>> GetFollowedTagsAsync()
{
if (Config.Instance == null) throw new Exception("Config object is not initialized");
if (Config.Instance.MastodonPostgresConnectionString.IsNullOrEmpty())
{
throw new Exception("Missing mastodon postgres connection string");
}
if (Context.Configuration == null) throw new Exception("Config object is not initialized");
await using var conn = new NpgsqlConnection(Config.Instance.MastodonPostgresConnectionString);
if (!Context.Configuration.MastodonPostgresConnectionString.IsNullOrEmpty())
return await GetFollowedTagsDatabaseAsync();
if (Context.Configuration.Api != null)
return await GetFollowedTagsApiAsync();
return new List<string>();
}
private static async Task<List<string>> GetFollowedTagsApiAsync()
{
if (Context.Configuration == null) throw new Exception("Config object is not initialized");
var api = Context.Configuration.Api!;
var client = new HttpClient();
client.DefaultRequestHeaders.Add("User-Agent", "GetMoarFediverse");
var tags = new List<string>();
foreach (var token in api.Tokens)
{
var request = new HttpRequestMessage(HttpMethod.Get, $"{api.Url}v1/followed_tags")
{
Headers = {Authorization = new AuthenticationHeaderValue("Bearer", token.Token)}
};
var response = await client.SendAsync(request);
response.EnsureSuccessStatusCode();
var data = JsonSerializer.Deserialize(await response.Content.ReadAsStringAsync(),
CamelCaseJsonContext.Default.FollowedTagArray);
if (data == null)
{
throw new Exception($"Error deserializing the followed tags response for {token.Owner}");
}
tags.AddRange(data.Select(t => t.Name));
}
return tags.Distinct().OrderBy(t => t).ToList();
}
private static async Task<List<string>> GetFollowedTagsDatabaseAsync()
{
if (Context.Configuration == null) throw new Exception("Config object is not initialized");
await using var conn = new NpgsqlConnection(Context.Configuration.MastodonPostgresConnectionString);
await conn.OpenAsync();
var res = new List<string>();
@ -27,13 +72,13 @@ public static class MastodonConnectionHelper
public static async Task<List<string>> GetPinnedTagsAsync()
{
if (Config.Instance == null) throw new Exception("Config object is not initialized");
if (Config.Instance.MastodonPostgresConnectionString.IsNullOrEmpty())
if (Context.Configuration == null) throw new Exception("Config object is not initialized");
if (Context.Configuration.MastodonPostgresConnectionString.IsNullOrEmpty())
{
throw new Exception("Missing mastodon postgres connection string");
}
await using var conn = new NpgsqlConnection(Config.Instance.MastodonPostgresConnectionString);
await using var conn = new NpgsqlConnection(Context.Configuration.MastodonPostgresConnectionString);
await conn.OpenAsync();
var res = new List<string>();
@ -64,7 +109,7 @@ ORDER BY col->'params'->>'id' ASC", conn);
}
}
return res;
return res.Distinct().ToList();
}
}

View File

@ -1,6 +1,7 @@
using System.Collections.Concurrent;
using System.Text.Json;
using GetMoarFediverse;
using GetMoarFediverse.Configuration;
using TurnerSoftware.RobotsExclusionTools;
var configPath = Environment.GetEnvironmentVariable("CONFIG_PATH");
@ -13,23 +14,18 @@ if (configPath.IsNullOrEmpty())
throw new Exception("Missing config path");
}
Config.Init(configPath);
if (Config.Instance == null)
{
throw new Exception("Error initializing config object");
}
Context.Load(configPath);
var client = new HttpClient();
client.DefaultRequestHeaders.Add("User-Agent", "GetMoarFediverse");
var authClient = new HttpClient
{
BaseAddress = new Uri(Config.Instance.FakeRelayUrl)
BaseAddress = new Uri(Context.Configuration.FakeRelayUrl)
};
authClient.DefaultRequestHeaders.Add("Authorization", "Bearer " + Config.Instance.FakeRelayApiKey);
authClient.DefaultRequestHeaders.Add("Authorization", "Bearer " + Context.Configuration.FakeRelayApiKey);
var importedPath = Config.Instance.ImportedPath;
var importedPath = Context.Configuration.ImportedPath;
if (!File.Exists(importedPath))
{
File.WriteAllText(importedPath, "");
@ -37,8 +33,8 @@ if (!File.Exists(importedPath))
var robotsFileParser = new RobotsFileParser();
var sitesRobotFile = new ConcurrentDictionary<string, RobotsFile>();
await Parallel.ForEachAsync(Config.Instance.Sites,
new ParallelOptions { MaxDegreeOfParallelism = Config.Instance.Sites.Length },
await Parallel.ForEachAsync(Context.Configuration.Sites,
new ParallelOptions { MaxDegreeOfParallelism = Context.Configuration.Sites.Length },
async (site, _) =>
{
try
@ -54,25 +50,36 @@ await Parallel.ForEachAsync(Config.Instance.Sites,
List<(string host, string tag)> sitesTags;
int numberOfTags;
if (Config.Instance.MastodonPostgresConnectionString.HasValue())
var tags = new List<string>();
if (Context.Configuration.MastodonPostgresConnectionString.HasValue() || Context.Configuration.Api != null)
{
var tags = await MastodonConnectionHelper.GetFollowedTagsAsync();
if (Config.Instance.PinnedTags)
tags.AddRange(await MastodonConnectionHelper.GetFollowedTagsAsync());
}
if (Context.Configuration.MastodonPostgresConnectionString.HasValue())
{
if (Context.Configuration.PinnedTags)
{
tags = tags.Concat(await MastodonConnectionHelper.GetPinnedTagsAsync()).Distinct().ToList();
}
}
if (tags.Any())
{
numberOfTags = tags.Count;
sitesTags = Config.Instance.Sites
sitesTags = Context.Configuration.Sites
.SelectMany(s => tags.Select(t => (s.Host, t)))
.OrderBy(e => e.t)
.ToList();
}
else
{
numberOfTags = Config.Instance.Tags.Length;
sitesTags = Config.Instance.Sites
.SelectMany(s => Config.Instance.Tags.Select(tag => (s.Host, tag)))
.Concat(Config.Instance.Sites.SelectMany(s => s.SiteSpecificTags.Select(tag => (s.Host, tag))))
numberOfTags = Context.Configuration.Tags.Length;
sitesTags = Context.Configuration.Sites
.SelectMany(s => Context.Configuration.Tags.Select(tag => (s.Host, tag)))
.Concat(Context.Configuration.Sites.SelectMany(s => s.SiteSpecificTags.Select(tag => (s.Host, tag))))
.OrderBy(t => t.tag)
.ToList();
}
@ -115,7 +122,7 @@ await Parallel.ForEachAsync(sitesTags, new ParallelOptions{MaxDegreeOfParallelis
var json = await response.Content.ReadAsStringAsync();
var data = JsonSerializer.Deserialize(json, CamelCaseJsonContext.Default.StatusArray);
var data = JsonSerializer.Deserialize(json, CamelCaseJsonContext.Default.StatusResponseArray);
if (data == null)
{
Console.WriteLine($"Error deserializing the response when pulling #{tag} posts from {site}");
@ -164,13 +171,3 @@ if (importedList.Count > maxFileLines)
}
File.WriteAllLines(importedPath, importedList);
public class Status
{
public string Uri { get; }
public Status(string uri)
{
Uri = uri;
}
}

View File

@ -0,0 +1,7 @@
namespace GetMoarFediverse.Responses;
public class FollowedTag
{
// Other properties in the returned JSON are ignored.
public string Name { get; set; } = null!;
}

View File

@ -0,0 +1,11 @@
namespace GetMoarFediverse.Responses;
public class StatusResponse
{
public string Uri { get; }
public StatusResponse(string uri)
{
Uri = uri;
}
}