diff --git a/.gitignore b/.gitignore index ca96dbb..226218b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ launchSettings.json .DS_Store data/ output/ -*.DotSettings.user \ No newline at end of file +*.DotSettings.user +config.json +imported.txt \ No newline at end of file diff --git a/README.md b/README.md index c0bd395..1d3d87d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The `FakeRelayApiKey` on the `config.json` is optional. If you don't provide one This `config.json` pulls two tags from two instances: -``` +```json { "FakeRelayUrl": "https://fakerelay.gervas.io", "FakeRelayApiKey": "1TxL6m1Esx6tnv4EPxscvAmdQN7qSn0nKeyoM7LD8b9mz+GNfrKaHiWgiT3QcNMUA+dWLyWD8qyl1MuKJ+4uHA==", @@ -23,11 +23,11 @@ This `config.json` pulls two tags from two instances: } ``` -### Downloading all the followed hashtags of your instance +### Downloading all the followed hashtags of your instance using the database You can pass `MastodonPostgresConnectionString` with a connection string to your postgres database and GetMoarFediverse will download content for all the hashtags the users on your server follow. Here's an example: -``` +```json { "FakeRelayUrl": "https://fakerelay.gervas.io", "FakeRelayApiKey": "1TxL6m1Esx6tnv4EPxscvAmdQN7qSn0nKeyoM7LD8b9m+GNfrKaHiWgiT3QcNMUA+dWLyWD8qyl1MuKJ+4uHA==", @@ -40,7 +40,7 @@ You can pass `MastodonPostgresConnectionString` with a connection string to your If you add `"PinnedTags": true`, you can also populate the hashtags pinned by your users :) thanks [@nberlee](https://github.com/nberlee), this is great! -``` +```json { "FakeRelayUrl": "https://fakerelay.gervas.io", "FakeRelayApiKey": "1TxL6m1Esx6tnv4EPxscvAmdQN7qSn0nKeyoM7LD8b9m+GNfrKaHiWgiT3QcNMUA+dWLyWD8qyl1MuKJ+4uHA==", @@ -50,6 +50,33 @@ If you add `"PinnedTags": true`, you can also populate the hashtags pinned by yo } ``` +### Downloading the hashtags followed by users via the API + +You can pass an `Api` object and GetMoarFediverse will download content for all the hashtags for each user for whom an access token is provided. Here's an example: + +```json +{ + "FakeRelayUrl": "https://foo.example", + "FakeRelayApiKey": "blah==", + "Api": { + "Url": "https://mastodon.example/api/", + "Tokens": [ + { + "Owner": "Chris", + "Token": "1413D6izFoQdu0x00000DZ9ufcBvhOt7hoxuctHg2c" + } + ] + }, + "Instances": [ "hachyderm.io", "mastodon.social" ] +} +``` + +For the `Tokens` array items, both `Owner` and `Token` are required fields. Owner can be any non-empty string that would identify the owner of the token (e.g. could be the Mastodon username, app client ID, etc). This data structure allows multiple user accounts to be supported. + +To create an access token for the config file, visit the web interface of your Mastodon instance and go to `/settings/applications` (Settings > Development). The token only requires `read:follows` scope. Then copy the access token shown at the top of the screen. + +> If a database connection string is also provided via `MastodonPostgresConnectionString`, the tags will be retrieved via the database and any API-related settings will be ignored. + ## How can I run it? There are many ways for you to run GetMoarFediverse: diff --git a/src/Config.cs b/src/Config.cs deleted file mode 100644 index 8df9ebe..0000000 --- a/src/Config.cs +++ /dev/null @@ -1,119 +0,0 @@ -using System.Collections.Immutable; -using System.Text.Json; - -namespace GetMoarFediverse; - -public class Config -{ - public static Config? Instance { get; private set; } - - public string ImportedPath { get; } - public string FakeRelayUrl { get; } - public string FakeRelayApiKey { get; } - public string? MastodonPostgresConnectionString { get; } - public bool PinnedTags { get; } - public ImmutableArray Tags { get; } - public ImmutableArray Sites { get; } - - - private Config(string importedPath, string fakeRelayUrl, string fakeRelayApiKey, string? mastodonPostgresConnectionString, - bool pinnedTags, ImmutableArray tags, ImmutableArray sites) - { - ImportedPath = importedPath; - FakeRelayUrl = fakeRelayUrl; - FakeRelayApiKey = fakeRelayApiKey; - MastodonPostgresConnectionString = mastodonPostgresConnectionString; - PinnedTags = pinnedTags; - Tags = tags; - Sites = sites; - } - - public static void Init(string path) - { - if (Instance != null) - { - return; - } - - var data = JsonSerializer.Deserialize(File.ReadAllText(path), JsonContext.Default.ConfigData); - if (data == null) - { - throw new Exception("Could not deserialize the config file"); - } - - var importedPath = Path.Join(Path.GetDirectoryName(path), "imported.txt"); - var apiKey = string.IsNullOrEmpty(data.FakeRelayApiKey) - ? Environment.GetEnvironmentVariable("FAKERELAY_APIKEY") - : data.FakeRelayApiKey; - - if (apiKey == null) - { - throw new Exception("The api key is missing"); - } - - if (data.Sites is { Length: > 0 }) - { - Console.WriteLine("|============================================================|"); - Console.WriteLine("| Warning: Sites is deprecated, please use Instances instead |"); - Console.WriteLine("|============================================================|\n"); - } - - data.Tags ??= Array.Empty(); - if (data.MastodonPostgresConnectionString.HasValue() && data.Tags.Length > 0) - { - throw new Exception("You can't specify both MastodonPostgresConnectionString and Tags"); - } - - if (data.FakeRelayUrl.IsNullOrEmpty()) - { - throw new Exception("Missing FakeRelayUrl"); - } - - Instance = new Config(importedPath, data.FakeRelayUrl, apiKey, data.MastodonPostgresConnectionString, - data.PinnedTags, data.Tags.ToImmutableArray(), data.GetImmutableSites()); - } - - public class ConfigData - { - public string? FakeRelayUrl { get; set; } - public string? FakeRelayApiKey { get; set; } - public string? MastodonPostgresConnectionString { get; set; } - public bool PinnedTags { get; set; } - public string[]? Instances { get; set; } - public string[]? Tags { get; set; } - public InternalSiteData[]? Sites { get; set; } - - public ImmutableArray GetImmutableSites() - { - // the plan is to stop supporting Sites in favor of Instances. SiteSpecificTags add complexity and - // don't make sense when pulling tags from Mastodon. Also, pulling is fast and multi threaded! - if (Instances != null) - { - return Instances - .Select(i => new SiteData(i, ImmutableArray.Empty)) - .ToImmutableArray(); - } - - return Sites == null - ? ImmutableArray.Empty - : Sites.Select(s => s.ToSiteData()) - .ToImmutableArray(); - } - - public class InternalSiteData - { - public InternalSiteData(string host, string[]? siteSpecificTags) - { - Host = host; - SiteSpecificTags = siteSpecificTags; - } - - public string Host { get; } - public string[]? SiteSpecificTags { get; } - public SiteData ToSiteData() => - new(Host, SiteSpecificTags?.ToImmutableArray() ?? ImmutableArray.Empty); - } - } - - public record SiteData(string Host, ImmutableArray SiteSpecificTags); -} diff --git a/src/Configuration/Config.cs b/src/Configuration/Config.cs new file mode 100644 index 0000000..f909cb1 --- /dev/null +++ b/src/Configuration/Config.cs @@ -0,0 +1,20 @@ +using System.Collections.Immutable; + +namespace GetMoarFediverse.Configuration; + +public record Config( + string ImportedPath, + string FakeRelayUrl, + string FakeRelayApiKey, + string? MastodonPostgresConnectionString, + MastodonApi? Api, + bool PinnedTags, + ImmutableArray Tags, + ImmutableArray Sites +); + +public record MastodonApi(string Url, ImmutableArray Tokens); + +public record MastodonApiAccessToken(string Owner, string Token); + +public record SiteData(string Host, ImmutableArray SiteSpecificTags); \ No newline at end of file diff --git a/src/Configuration/Context.cs b/src/Configuration/Context.cs new file mode 100644 index 0000000..9404c6c --- /dev/null +++ b/src/Configuration/Context.cs @@ -0,0 +1,13 @@ +using GetMoarFediverse.Configuration.Unsafe; + +namespace GetMoarFediverse.Configuration; + +public static class Context +{ + public static Config Configuration { get; private set; } = null!; + + public static void Load(string filename) + { + Configuration = UnsafeConfig.ToConfig(filename); + } +} \ No newline at end of file diff --git a/src/Configuration/Unsafe/UnsafeConfig.cs b/src/Configuration/Unsafe/UnsafeConfig.cs new file mode 100644 index 0000000..27f29ee --- /dev/null +++ b/src/Configuration/Unsafe/UnsafeConfig.cs @@ -0,0 +1,80 @@ +using System.Collections.Immutable; +using System.Text.Json; + +namespace GetMoarFediverse.Configuration.Unsafe; + +public class UnsafeConfig +{ + public string? ImportedPath { get; set; } + public string? FakeRelayUrl { get; set; } + public string? FakeRelayApiKey { get; set; } + public string? MastodonPostgresConnectionString { get; set; } + public UnsafeMastodonApi? Api { get; set; } + public bool PinnedTags { get; set; } + public string[]? Instances { get; set; } + public string[]? Tags { get; set; } + public UnsafeSiteData[]? Sites { get; set; } + + public static Config ToConfig(string path) + { + var data = JsonSerializer.Deserialize(File.ReadAllText(path), JsonContext.Default.UnsafeConfig); + if (data == null) + { + throw new Exception("Could not deserialize the config file"); + } + + data.ImportedPath = Path.Join(Path.GetDirectoryName(path), "imported.txt"); + data.FakeRelayApiKey ??= Environment.GetEnvironmentVariable("FAKERELAY_APIKEY"); + + if (data.FakeRelayApiKey == null) + { + throw new Exception("The api key is missing"); + } + + if (data.Sites is { Length: > 0 }) + { + Console.WriteLine("|============================================================|"); + Console.WriteLine("| Warning: Sites is deprecated, please use Instances instead |"); + Console.WriteLine("|============================================================|\n"); + } + + data.Tags ??= Array.Empty(); + + if ((data.MastodonPostgresConnectionString.HasValue() || data.Api != null) && data.Tags.Length > 0) + { + throw new Exception("You can't specify both MastodonPostgresConnectionString / API and Tags"); + } + + if (data.FakeRelayUrl.IsNullOrEmpty()) + { + throw new Exception("Missing FakeRelayUrl"); + } + + return new Config( + data.ImportedPath!, + data.FakeRelayUrl!, + data.FakeRelayApiKey!, + data.MastodonPostgresConnectionString, + data.Api?.ToMastodonApi(), + data.PinnedTags, + data.Tags?.ToImmutableArray() ?? ImmutableArray.Empty, + data.GetImmutableSites()); + } + + private ImmutableArray GetImmutableSites() + { + // the plan is to stop supporting Sites in favor of Instances. SiteSpecificTags add complexity and + // don't make sense when pulling tags from Mastodon. Also, pulling is fast and multi threaded! + if (Instances != null) + { + return Instances + .Select(i => new SiteData(i, ImmutableArray.Empty)) + .ToImmutableArray(); + } + + return Sites == null + ? ImmutableArray.Empty + : Sites.Select(s => s.ToSiteData()) + .ToImmutableArray(); + } +} \ No newline at end of file diff --git a/src/Configuration/Unsafe/UnsafeMastodonApi.cs b/src/Configuration/Unsafe/UnsafeMastodonApi.cs new file mode 100644 index 0000000..b8a552b --- /dev/null +++ b/src/Configuration/Unsafe/UnsafeMastodonApi.cs @@ -0,0 +1,23 @@ +using System.Collections.Immutable; + +namespace GetMoarFediverse.Configuration.Unsafe; + +public class UnsafeMastodonApi +{ + public string? Url { get; set; } + public UnsafeMastodonApiAccessToken[]? Tokens { get; set; } + + public MastodonApi ToMastodonApi() + { + if (Url.IsNullOrEmpty()) + throw new Exception("A valid Url must be provided for the Api"); + + if (!Url!.EndsWith("/api/")) + throw new Exception("The Url must end with /api/"); + + return new MastodonApi(Url!, + Tokens == null + ? ImmutableArray.Empty + : Tokens.Select(t => t.ToAccessToken()).ToImmutableArray()); + } +} \ No newline at end of file diff --git a/src/Configuration/Unsafe/UnsafeMastodonApiAccessToken.cs b/src/Configuration/Unsafe/UnsafeMastodonApiAccessToken.cs new file mode 100644 index 0000000..9c054d3 --- /dev/null +++ b/src/Configuration/Unsafe/UnsafeMastodonApiAccessToken.cs @@ -0,0 +1,15 @@ +namespace GetMoarFediverse.Configuration.Unsafe; + +public class UnsafeMastodonApiAccessToken +{ + public string? Owner { get; set; } + public string? Token { get; set; } + + public MastodonApiAccessToken ToAccessToken() + { + if (Owner.IsNullOrEmpty() || Token.IsNullOrEmpty()) + throw new Exception("An Owner and Token must both be specified for an API Access Token."); + + return new MastodonApiAccessToken(Owner!, Token!); + } +} \ No newline at end of file diff --git a/src/Configuration/Unsafe/UnsafeSiteData.cs b/src/Configuration/Unsafe/UnsafeSiteData.cs new file mode 100644 index 0000000..7f1b9ba --- /dev/null +++ b/src/Configuration/Unsafe/UnsafeSiteData.cs @@ -0,0 +1,17 @@ +using System.Collections.Immutable; + +namespace GetMoarFediverse.Configuration.Unsafe; + +public class UnsafeSiteData +{ + public UnsafeSiteData(string host, string[]? siteSpecificTags) + { + Host = host; + SiteSpecificTags = siteSpecificTags; + } + + public string Host { get; } + public string[]? SiteSpecificTags { get; } + public SiteData ToSiteData() => + new(Host, SiteSpecificTags?.ToImmutableArray() ?? ImmutableArray.Empty); +} \ No newline at end of file diff --git a/src/JsonContexts.cs b/src/JsonContexts.cs index 807653d..e3f76e1 100644 --- a/src/JsonContexts.cs +++ b/src/JsonContexts.cs @@ -1,13 +1,16 @@ using System.Text.Json.Serialization; +using GetMoarFediverse.Configuration.Unsafe; +using GetMoarFediverse.Responses; namespace GetMoarFediverse; -[JsonSerializable(typeof(Config.ConfigData))] +[JsonSerializable(typeof(UnsafeConfig))] internal partial class JsonContext : JsonSerializerContext { } -[JsonSerializable(typeof(Status[]))] +[JsonSerializable(typeof(StatusResponse[]))] +[JsonSerializable(typeof(FollowedTag[]))] [JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)] internal partial class CamelCaseJsonContext : JsonSerializerContext { diff --git a/src/MastodonConnectionHelper.cs b/src/MastodonConnectionHelper.cs index 1babebe..29c5a56 100644 --- a/src/MastodonConnectionHelper.cs +++ b/src/MastodonConnectionHelper.cs @@ -1,5 +1,7 @@ +using System.Net.Http.Headers; using Npgsql; using System.Text.Json; +using GetMoarFediverse.Configuration; namespace GetMoarFediverse; @@ -7,13 +9,56 @@ public static class MastodonConnectionHelper { public static async Task> GetFollowedTagsAsync() { - if (Config.Instance == null) throw new Exception("Config object is not initialized"); - if (Config.Instance.MastodonPostgresConnectionString.IsNullOrEmpty()) - { - throw new Exception("Missing mastodon postgres connection string"); - } + if (Context.Configuration == null) throw new Exception("Config object is not initialized"); - await using var conn = new NpgsqlConnection(Config.Instance.MastodonPostgresConnectionString); + if (!Context.Configuration.MastodonPostgresConnectionString.IsNullOrEmpty()) + return await GetFollowedTagsDatabaseAsync(); + + if (Context.Configuration.Api != null) + return await GetFollowedTagsApiAsync(); + + return new List(); + } + + private static async Task> GetFollowedTagsApiAsync() + { + if (Context.Configuration == null) throw new Exception("Config object is not initialized"); + var api = Context.Configuration.Api!; + + var client = new HttpClient(); + client.DefaultRequestHeaders.Add("User-Agent", "GetMoarFediverse"); + + var tags = new List(); + + foreach (var token in api.Tokens) + { + var request = new HttpRequestMessage(HttpMethod.Get, $"{api.Url}v1/followed_tags") + { + Headers = {Authorization = new AuthenticationHeaderValue("Bearer", token.Token)} + }; + + var response = await client.SendAsync(request); + response.EnsureSuccessStatusCode(); + + var data = JsonSerializer.Deserialize(await response.Content.ReadAsStringAsync(), + CamelCaseJsonContext.Default.FollowedTagArray); + + if (data == null) + { + throw new Exception($"Error deserializing the followed tags response for {token.Owner}"); + } + + tags.AddRange(data.Select(t => t.Name)); + } + + return tags.Distinct().OrderBy(t => t).ToList(); + } + + private static async Task> GetFollowedTagsDatabaseAsync() + { + if (Context.Configuration == null) throw new Exception("Config object is not initialized"); + + await using var conn = new NpgsqlConnection(Context.Configuration.MastodonPostgresConnectionString); await conn.OpenAsync(); var res = new List(); @@ -27,13 +72,13 @@ public static class MastodonConnectionHelper public static async Task> GetPinnedTagsAsync() { - if (Config.Instance == null) throw new Exception("Config object is not initialized"); - if (Config.Instance.MastodonPostgresConnectionString.IsNullOrEmpty()) + if (Context.Configuration == null) throw new Exception("Config object is not initialized"); + if (Context.Configuration.MastodonPostgresConnectionString.IsNullOrEmpty()) { throw new Exception("Missing mastodon postgres connection string"); } - await using var conn = new NpgsqlConnection(Config.Instance.MastodonPostgresConnectionString); + await using var conn = new NpgsqlConnection(Context.Configuration.MastodonPostgresConnectionString); await conn.OpenAsync(); var res = new List(); @@ -64,7 +109,7 @@ ORDER BY col->'params'->>'id' ASC", conn); } } - return res; + return res.Distinct().ToList(); } } diff --git a/src/Program.cs b/src/Program.cs index ef144d2..677b01b 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -1,6 +1,7 @@ using System.Collections.Concurrent; using System.Text.Json; using GetMoarFediverse; +using GetMoarFediverse.Configuration; using TurnerSoftware.RobotsExclusionTools; var configPath = Environment.GetEnvironmentVariable("CONFIG_PATH"); @@ -13,23 +14,18 @@ if (configPath.IsNullOrEmpty()) throw new Exception("Missing config path"); } -Config.Init(configPath); - -if (Config.Instance == null) -{ - throw new Exception("Error initializing config object"); -} +Context.Load(configPath); var client = new HttpClient(); client.DefaultRequestHeaders.Add("User-Agent", "GetMoarFediverse"); var authClient = new HttpClient { - BaseAddress = new Uri(Config.Instance.FakeRelayUrl) + BaseAddress = new Uri(Context.Configuration.FakeRelayUrl) }; -authClient.DefaultRequestHeaders.Add("Authorization", "Bearer " + Config.Instance.FakeRelayApiKey); +authClient.DefaultRequestHeaders.Add("Authorization", "Bearer " + Context.Configuration.FakeRelayApiKey); -var importedPath = Config.Instance.ImportedPath; +var importedPath = Context.Configuration.ImportedPath; if (!File.Exists(importedPath)) { File.WriteAllText(importedPath, ""); @@ -37,8 +33,8 @@ if (!File.Exists(importedPath)) var robotsFileParser = new RobotsFileParser(); var sitesRobotFile = new ConcurrentDictionary(); -await Parallel.ForEachAsync(Config.Instance.Sites, - new ParallelOptions { MaxDegreeOfParallelism = Config.Instance.Sites.Length }, +await Parallel.ForEachAsync(Context.Configuration.Sites, + new ParallelOptions { MaxDegreeOfParallelism = Context.Configuration.Sites.Length }, async (site, _) => { try @@ -54,25 +50,36 @@ await Parallel.ForEachAsync(Config.Instance.Sites, List<(string host, string tag)> sitesTags; int numberOfTags; -if (Config.Instance.MastodonPostgresConnectionString.HasValue()) + +var tags = new List(); + +if (Context.Configuration.MastodonPostgresConnectionString.HasValue() || Context.Configuration.Api != null) { - var tags = await MastodonConnectionHelper.GetFollowedTagsAsync(); - if (Config.Instance.PinnedTags) + tags.AddRange(await MastodonConnectionHelper.GetFollowedTagsAsync()); +} + +if (Context.Configuration.MastodonPostgresConnectionString.HasValue()) +{ + if (Context.Configuration.PinnedTags) { tags = tags.Concat(await MastodonConnectionHelper.GetPinnedTagsAsync()).Distinct().ToList(); } +} + +if (tags.Any()) +{ numberOfTags = tags.Count; - sitesTags = Config.Instance.Sites + sitesTags = Context.Configuration.Sites .SelectMany(s => tags.Select(t => (s.Host, t))) .OrderBy(e => e.t) .ToList(); } else { - numberOfTags = Config.Instance.Tags.Length; - sitesTags = Config.Instance.Sites - .SelectMany(s => Config.Instance.Tags.Select(tag => (s.Host, tag))) - .Concat(Config.Instance.Sites.SelectMany(s => s.SiteSpecificTags.Select(tag => (s.Host, tag)))) + numberOfTags = Context.Configuration.Tags.Length; + sitesTags = Context.Configuration.Sites + .SelectMany(s => Context.Configuration.Tags.Select(tag => (s.Host, tag))) + .Concat(Context.Configuration.Sites.SelectMany(s => s.SiteSpecificTags.Select(tag => (s.Host, tag)))) .OrderBy(t => t.tag) .ToList(); } @@ -115,7 +122,7 @@ await Parallel.ForEachAsync(sitesTags, new ParallelOptions{MaxDegreeOfParallelis var json = await response.Content.ReadAsStringAsync(); - var data = JsonSerializer.Deserialize(json, CamelCaseJsonContext.Default.StatusArray); + var data = JsonSerializer.Deserialize(json, CamelCaseJsonContext.Default.StatusResponseArray); if (data == null) { Console.WriteLine($"Error deserializing the response when pulling #{tag} posts from {site}"); @@ -164,13 +171,3 @@ if (importedList.Count > maxFileLines) } File.WriteAllLines(importedPath, importedList); - -public class Status -{ - public string Uri { get; } - - public Status(string uri) - { - Uri = uri; - } -} diff --git a/src/Responses/FollowedTagResponse.cs b/src/Responses/FollowedTagResponse.cs new file mode 100644 index 0000000..311c71c --- /dev/null +++ b/src/Responses/FollowedTagResponse.cs @@ -0,0 +1,7 @@ +namespace GetMoarFediverse.Responses; + +public class FollowedTag +{ + // Other properties in the returned JSON are ignored. + public string Name { get; set; } = null!; +} \ No newline at end of file diff --git a/src/Responses/StatusResponse.cs b/src/Responses/StatusResponse.cs new file mode 100644 index 0000000..1676be2 --- /dev/null +++ b/src/Responses/StatusResponse.cs @@ -0,0 +1,11 @@ +namespace GetMoarFediverse.Responses; + +public class StatusResponse +{ + public string Uri { get; } + + public StatusResponse(string uri) + { + Uri = uri; + } +} \ No newline at end of file