add a crawler schedule

This commit is contained in:
Kenneth Bingham 2024-01-12 00:11:41 -05:00
parent 94d2e3aef6
commit d894367ee4
No known key found for this signature in database
GPG Key ID: 31709281860130B6

View File

@ -1,43 +1,44 @@
// this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler
new Crawler({
appId: 'CO73R59OLO',
apiKey: 'ALGOLIA_CRAWLER_API_KEY',
appId: "CO73R59OLO",
apiKey: "ALGOLIA_CRAWLER_API_KEY",
rateLimit: 8,
maxDepth: 10,
startUrls: ['https://docs.zrok.io/'],
sitemaps: ['https://docs.zrok.io/sitemap.xml'],
ignoreCanonicalTo: true,
discoveryPatterns: ['https://docs.zrok.io/**'],
startUrls: ["https://docs.zrok.io/"],
sitemaps: ["https://docs.zrok.io/sitemap.xml"],
ignoreCanonicalTo: false,
discoveryPatterns: ["https://docs.zrok.io/**"],
schedule: "every 1 day at 2:22 pm",
actions: [
{
indexName: 'zrok',
pathsToMatch: ['https://docs.zrok.io/**'],
indexName: "zrok",
pathsToMatch: ["https://docs.zrok.io/**"],
recordExtractor: ({ $, helpers }) => {
// priority order: deepest active sub list header -> navbar active item -> 'Documentation'
const lvl0 =
$(
'.menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active'
".menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active",
)
.last()
.text() || 'Documentation';
.text() || "Documentation";
return helpers.docsearch({
recordProps: {
lvl0: {
selectors: '',
selectors: "",
defaultValue: lvl0,
},
lvl1: ['header h1', 'article h1'],
lvl2: 'article h2',
lvl3: 'article h3',
lvl4: 'article h4',
lvl5: 'article h5, article td:first-child',
lvl6: 'article h6',
content: 'article p, article li, article td:last-child',
lvl1: ["header h1", "article h1"],
lvl2: "article h2",
lvl3: "article h3",
lvl4: "article h4",
lvl5: "article h5, article td:first-child",
lvl6: "article h6",
content: "article p, article li, article td:last-child",
},
indexHeadings: true,
aggregateContent: true,
recordVersion: 'v3',
recordVersion: "v3",
});
},
},
@ -45,51 +46,51 @@ new Crawler({
initialIndexSettings: {
zrok: {
attributesForFaceting: [
'type',
'lang',
'language',
'version',
'docusaurus_tag',
"type",
"lang",
"language",
"version",
"docusaurus_tag",
],
attributesToRetrieve: [
'hierarchy',
'content',
'anchor',
'url',
'url_without_anchor',
'type',
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type",
],
attributesToHighlight: ['hierarchy', 'content'],
attributesToSnippet: ['content:10'],
camelCaseAttributes: ['hierarchy', 'content'],
attributesToHighlight: ["hierarchy", "content"],
attributesToSnippet: ["content:10"],
camelCaseAttributes: ["hierarchy", "content"],
searchableAttributes: [
'unordered(hierarchy.lvl0)',
'unordered(hierarchy.lvl1)',
'unordered(hierarchy.lvl2)',
'unordered(hierarchy.lvl3)',
'unordered(hierarchy.lvl4)',
'unordered(hierarchy.lvl5)',
'unordered(hierarchy.lvl6)',
'content',
"unordered(hierarchy.lvl0)",
"unordered(hierarchy.lvl1)",
"unordered(hierarchy.lvl2)",
"unordered(hierarchy.lvl3)",
"unordered(hierarchy.lvl4)",
"unordered(hierarchy.lvl5)",
"unordered(hierarchy.lvl6)",
"content",
],
distinct: true,
attributeForDistinct: 'url',
attributeForDistinct: "url",
customRanking: [
'desc(weight.pageRank)',
'desc(weight.level)',
'asc(weight.position)',
"desc(weight.pageRank)",
"desc(weight.level)",
"asc(weight.position)",
],
ranking: [
'words',
'filters',
'typo',
'attribute',
'proximity',
'exact',
'custom',
"words",
"filters",
"typo",
"attribute",
"proximity",
"exact",
"custom",
],
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
highlightPostTag: '</span>',
highlightPostTag: "</span>",
minWordSizefor1Typo: 3,
minWordSizefor2Typos: 7,
allowTyposOnNumericTokens: false,
@ -97,8 +98,8 @@ new Crawler({
ignorePlurals: true,
advancedSyntax: true,
attributeCriteriaComputedByMinProximity: true,
removeWordsIfNoResults: 'allOptional',
separatorsToIndex: '_',
removeWordsIfNoResults: "allOptional",
separatorsToIndex: "_",
},
},
});