From 0de8d9a7e22f9d7a240ed9ecf69998499cd7d005 Mon Sep 17 00:00:00 2001 From: Kenneth Bingham Date: Tue, 14 Feb 2023 15:23:46 -0500 Subject: [PATCH] commit algolia crawler config for reference --- website/algolia-crawler-config.js | 104 ++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 website/algolia-crawler-config.js diff --git a/website/algolia-crawler-config.js b/website/algolia-crawler-config.js new file mode 100644 index 00000000..799c3679 --- /dev/null +++ b/website/algolia-crawler-config.js @@ -0,0 +1,104 @@ +// this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler +new Crawler({ + appId: 'CO73R59OLO', + apiKey: 'ALGOLIA_CRAWLER_API_KEY', + rateLimit: 8, + maxDepth: 10, + startUrls: ['https://docs.zrok.io/'], + sitemaps: ['https://docs.zrok.io/sitemap.xml'], + ignoreCanonicalTo: true, + discoveryPatterns: ['https://docs.zrok.io/**'], + actions: [ + { + indexName: 'zrok', + pathsToMatch: ['https://docs.zrok.io/**'], + recordExtractor: ({ $, helpers }) => { + // priority order: deepest active sub list header -> navbar active item -> 'Documentation' + const lvl0 = + $( + '.menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active' + ) + .last() + .text() || 'Documentation'; + + return helpers.docsearch({ + recordProps: { + lvl0: { + selectors: '', + defaultValue: lvl0, + }, + lvl1: ['header h1', 'article h1'], + lvl2: 'article h2', + lvl3: 'article h3', + lvl4: 'article h4', + lvl5: 'article h5, article td:first-child', + lvl6: 'article h6', + content: 'article p, article li, article td:last-child', + }, + indexHeadings: true, + aggregateContent: true, + recordVersion: 'v3', + }); + }, + }, + ], + initialIndexSettings: { + zrok: { + attributesForFaceting: [ + 'type', + 'lang', + 'language', + 'version', + 'docusaurus_tag', + ], + attributesToRetrieve: [ + 'hierarchy', + 'content', + 'anchor', + 'url', + 'url_without_anchor', + 'type', + ], + attributesToHighlight: ['hierarchy', 'content'], + attributesToSnippet: ['content:10'], + camelCaseAttributes: ['hierarchy', 'content'], + searchableAttributes: [ + 'unordered(hierarchy.lvl0)', + 'unordered(hierarchy.lvl1)', + 'unordered(hierarchy.lvl2)', + 'unordered(hierarchy.lvl3)', + 'unordered(hierarchy.lvl4)', + 'unordered(hierarchy.lvl5)', + 'unordered(hierarchy.lvl6)', + 'content', + ], + distinct: true, + attributeForDistinct: 'url', + customRanking: [ + 'desc(weight.pageRank)', + 'desc(weight.level)', + 'asc(weight.position)', + ], + ranking: [ + 'words', + 'filters', + 'typo', + 'attribute', + 'proximity', + 'exact', + 'custom', + ], + highlightPreTag: '', + highlightPostTag: '', + minWordSizefor1Typo: 3, + minWordSizefor2Typos: 7, + allowTyposOnNumericTokens: false, + minProximity: 1, + ignorePlurals: true, + advancedSyntax: true, + attributeCriteriaComputedByMinProximity: true, + removeWordsIfNoResults: 'allOptional', + separatorsToIndex: '_', + }, + }, +}); \ No newline at end of file