zrok/website/algolia-crawler-config.js
2024-01-12 00:11:41 -05:00

105 lines
3.0 KiB
JavaScript

// this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler
new Crawler({
appId: "CO73R59OLO",
apiKey: "ALGOLIA_CRAWLER_API_KEY",
rateLimit: 8,
maxDepth: 10,
startUrls: ["https://docs.zrok.io/"],
sitemaps: ["https://docs.zrok.io/sitemap.xml"],
ignoreCanonicalTo: false,
discoveryPatterns: ["https://docs.zrok.io/**"],
schedule: "every 1 day at 2:22 pm",
actions: [
{
indexName: "zrok",
pathsToMatch: ["https://docs.zrok.io/**"],
recordExtractor: ({ $, helpers }) => {
// priority order: deepest active sub list header -> navbar active item -> 'Documentation'
const lvl0 =
$(
".menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active",
)
.last()
.text() || "Documentation";
return helpers.docsearch({
recordProps: {
lvl0: {
selectors: "",
defaultValue: lvl0,
},
lvl1: ["header h1", "article h1"],
lvl2: "article h2",
lvl3: "article h3",
lvl4: "article h4",
lvl5: "article h5, article td:first-child",
lvl6: "article h6",
content: "article p, article li, article td:last-child",
},
indexHeadings: true,
aggregateContent: true,
recordVersion: "v3",
});
},
},
],
initialIndexSettings: {
zrok: {
attributesForFaceting: [
"type",
"lang",
"language",
"version",
"docusaurus_tag",
],
attributesToRetrieve: [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type",
],
attributesToHighlight: ["hierarchy", "content"],
attributesToSnippet: ["content:10"],
camelCaseAttributes: ["hierarchy", "content"],
searchableAttributes: [
"unordered(hierarchy.lvl0)",
"unordered(hierarchy.lvl1)",
"unordered(hierarchy.lvl2)",
"unordered(hierarchy.lvl3)",
"unordered(hierarchy.lvl4)",
"unordered(hierarchy.lvl5)",
"unordered(hierarchy.lvl6)",
"content",
],
distinct: true,
attributeForDistinct: "url",
customRanking: [
"desc(weight.pageRank)",
"desc(weight.level)",
"asc(weight.position)",
],
ranking: [
"words",
"filters",
"typo",
"attribute",
"proximity",
"exact",
"custom",
],
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
highlightPostTag: "</span>",
minWordSizefor1Typo: 3,
minWordSizefor2Typos: 7,
allowTyposOnNumericTokens: false,
minProximity: 1,
ignorePlurals: true,
advancedSyntax: true,
attributeCriteriaComputedByMinProximity: true,
removeWordsIfNoResults: "allOptional",
separatorsToIndex: "_",
},
},
});