add a crawler schedule

This commit is contained in:
Kenneth Bingham 2024-01-12 00:11:41 -05:00
parent 94d2e3aef6
commit d894367ee4
No known key found for this signature in database
GPG Key ID: 31709281860130B6

View File

@ -1,43 +1,44 @@
// this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler // this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler
new Crawler({ new Crawler({
appId: 'CO73R59OLO', appId: "CO73R59OLO",
apiKey: 'ALGOLIA_CRAWLER_API_KEY', apiKey: "ALGOLIA_CRAWLER_API_KEY",
rateLimit: 8, rateLimit: 8,
maxDepth: 10, maxDepth: 10,
startUrls: ['https://docs.zrok.io/'], startUrls: ["https://docs.zrok.io/"],
sitemaps: ['https://docs.zrok.io/sitemap.xml'], sitemaps: ["https://docs.zrok.io/sitemap.xml"],
ignoreCanonicalTo: true, ignoreCanonicalTo: false,
discoveryPatterns: ['https://docs.zrok.io/**'], discoveryPatterns: ["https://docs.zrok.io/**"],
schedule: "every 1 day at 2:22 pm",
actions: [ actions: [
{ {
indexName: 'zrok', indexName: "zrok",
pathsToMatch: ['https://docs.zrok.io/**'], pathsToMatch: ["https://docs.zrok.io/**"],
recordExtractor: ({ $, helpers }) => { recordExtractor: ({ $, helpers }) => {
// priority order: deepest active sub list header -> navbar active item -> 'Documentation' // priority order: deepest active sub list header -> navbar active item -> 'Documentation'
const lvl0 = const lvl0 =
$( $(
'.menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active' ".menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active",
) )
.last() .last()
.text() || 'Documentation'; .text() || "Documentation";
return helpers.docsearch({ return helpers.docsearch({
recordProps: { recordProps: {
lvl0: { lvl0: {
selectors: '', selectors: "",
defaultValue: lvl0, defaultValue: lvl0,
}, },
lvl1: ['header h1', 'article h1'], lvl1: ["header h1", "article h1"],
lvl2: 'article h2', lvl2: "article h2",
lvl3: 'article h3', lvl3: "article h3",
lvl4: 'article h4', lvl4: "article h4",
lvl5: 'article h5, article td:first-child', lvl5: "article h5, article td:first-child",
lvl6: 'article h6', lvl6: "article h6",
content: 'article p, article li, article td:last-child', content: "article p, article li, article td:last-child",
}, },
indexHeadings: true, indexHeadings: true,
aggregateContent: true, aggregateContent: true,
recordVersion: 'v3', recordVersion: "v3",
}); });
}, },
}, },
@ -45,51 +46,51 @@ new Crawler({
initialIndexSettings: { initialIndexSettings: {
zrok: { zrok: {
attributesForFaceting: [ attributesForFaceting: [
'type', "type",
'lang', "lang",
'language', "language",
'version', "version",
'docusaurus_tag', "docusaurus_tag",
], ],
attributesToRetrieve: [ attributesToRetrieve: [
'hierarchy', "hierarchy",
'content', "content",
'anchor', "anchor",
'url', "url",
'url_without_anchor', "url_without_anchor",
'type', "type",
], ],
attributesToHighlight: ['hierarchy', 'content'], attributesToHighlight: ["hierarchy", "content"],
attributesToSnippet: ['content:10'], attributesToSnippet: ["content:10"],
camelCaseAttributes: ['hierarchy', 'content'], camelCaseAttributes: ["hierarchy", "content"],
searchableAttributes: [ searchableAttributes: [
'unordered(hierarchy.lvl0)', "unordered(hierarchy.lvl0)",
'unordered(hierarchy.lvl1)', "unordered(hierarchy.lvl1)",
'unordered(hierarchy.lvl2)', "unordered(hierarchy.lvl2)",
'unordered(hierarchy.lvl3)', "unordered(hierarchy.lvl3)",
'unordered(hierarchy.lvl4)', "unordered(hierarchy.lvl4)",
'unordered(hierarchy.lvl5)', "unordered(hierarchy.lvl5)",
'unordered(hierarchy.lvl6)', "unordered(hierarchy.lvl6)",
'content', "content",
], ],
distinct: true, distinct: true,
attributeForDistinct: 'url', attributeForDistinct: "url",
customRanking: [ customRanking: [
'desc(weight.pageRank)', "desc(weight.pageRank)",
'desc(weight.level)', "desc(weight.level)",
'asc(weight.position)', "asc(weight.position)",
], ],
ranking: [ ranking: [
'words', "words",
'filters', "filters",
'typo', "typo",
'attribute', "attribute",
'proximity', "proximity",
'exact', "exact",
'custom', "custom",
], ],
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">', highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
highlightPostTag: '</span>', highlightPostTag: "</span>",
minWordSizefor1Typo: 3, minWordSizefor1Typo: 3,
minWordSizefor2Typos: 7, minWordSizefor2Typos: 7,
allowTyposOnNumericTokens: false, allowTyposOnNumericTokens: false,
@ -97,8 +98,8 @@ new Crawler({
ignorePlurals: true, ignorePlurals: true,
advancedSyntax: true, advancedSyntax: true,
attributeCriteriaComputedByMinProximity: true, attributeCriteriaComputedByMinProximity: true,
removeWordsIfNoResults: 'allOptional', removeWordsIfNoResults: "allOptional",
separatorsToIndex: '_', separatorsToIndex: "_",
}, },
}, },
}); });