mirror of
https://github.com/openziti/zrok.git
synced 2024-11-28 19:14:07 +01:00
add a crawler schedule
This commit is contained in:
parent
94d2e3aef6
commit
d894367ee4
@ -1,43 +1,44 @@
|
|||||||
// this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler
|
// this is not part of the Docusaurus site, but a copy of the active config in the Algolia Crawler
|
||||||
new Crawler({
|
new Crawler({
|
||||||
appId: 'CO73R59OLO',
|
appId: "CO73R59OLO",
|
||||||
apiKey: 'ALGOLIA_CRAWLER_API_KEY',
|
apiKey: "ALGOLIA_CRAWLER_API_KEY",
|
||||||
rateLimit: 8,
|
rateLimit: 8,
|
||||||
maxDepth: 10,
|
maxDepth: 10,
|
||||||
startUrls: ['https://docs.zrok.io/'],
|
startUrls: ["https://docs.zrok.io/"],
|
||||||
sitemaps: ['https://docs.zrok.io/sitemap.xml'],
|
sitemaps: ["https://docs.zrok.io/sitemap.xml"],
|
||||||
ignoreCanonicalTo: true,
|
ignoreCanonicalTo: false,
|
||||||
discoveryPatterns: ['https://docs.zrok.io/**'],
|
discoveryPatterns: ["https://docs.zrok.io/**"],
|
||||||
|
schedule: "every 1 day at 2:22 pm",
|
||||||
actions: [
|
actions: [
|
||||||
{
|
{
|
||||||
indexName: 'zrok',
|
indexName: "zrok",
|
||||||
pathsToMatch: ['https://docs.zrok.io/**'],
|
pathsToMatch: ["https://docs.zrok.io/**"],
|
||||||
recordExtractor: ({ $, helpers }) => {
|
recordExtractor: ({ $, helpers }) => {
|
||||||
// priority order: deepest active sub list header -> navbar active item -> 'Documentation'
|
// priority order: deepest active sub list header -> navbar active item -> 'Documentation'
|
||||||
const lvl0 =
|
const lvl0 =
|
||||||
$(
|
$(
|
||||||
'.menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active'
|
".menu__link.menu__link--sublist.menu__link--active, .navbar__item.navbar__link--active",
|
||||||
)
|
)
|
||||||
.last()
|
.last()
|
||||||
.text() || 'Documentation';
|
.text() || "Documentation";
|
||||||
|
|
||||||
return helpers.docsearch({
|
return helpers.docsearch({
|
||||||
recordProps: {
|
recordProps: {
|
||||||
lvl0: {
|
lvl0: {
|
||||||
selectors: '',
|
selectors: "",
|
||||||
defaultValue: lvl0,
|
defaultValue: lvl0,
|
||||||
},
|
},
|
||||||
lvl1: ['header h1', 'article h1'],
|
lvl1: ["header h1", "article h1"],
|
||||||
lvl2: 'article h2',
|
lvl2: "article h2",
|
||||||
lvl3: 'article h3',
|
lvl3: "article h3",
|
||||||
lvl4: 'article h4',
|
lvl4: "article h4",
|
||||||
lvl5: 'article h5, article td:first-child',
|
lvl5: "article h5, article td:first-child",
|
||||||
lvl6: 'article h6',
|
lvl6: "article h6",
|
||||||
content: 'article p, article li, article td:last-child',
|
content: "article p, article li, article td:last-child",
|
||||||
},
|
},
|
||||||
indexHeadings: true,
|
indexHeadings: true,
|
||||||
aggregateContent: true,
|
aggregateContent: true,
|
||||||
recordVersion: 'v3',
|
recordVersion: "v3",
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -45,51 +46,51 @@ new Crawler({
|
|||||||
initialIndexSettings: {
|
initialIndexSettings: {
|
||||||
zrok: {
|
zrok: {
|
||||||
attributesForFaceting: [
|
attributesForFaceting: [
|
||||||
'type',
|
"type",
|
||||||
'lang',
|
"lang",
|
||||||
'language',
|
"language",
|
||||||
'version',
|
"version",
|
||||||
'docusaurus_tag',
|
"docusaurus_tag",
|
||||||
],
|
],
|
||||||
attributesToRetrieve: [
|
attributesToRetrieve: [
|
||||||
'hierarchy',
|
"hierarchy",
|
||||||
'content',
|
"content",
|
||||||
'anchor',
|
"anchor",
|
||||||
'url',
|
"url",
|
||||||
'url_without_anchor',
|
"url_without_anchor",
|
||||||
'type',
|
"type",
|
||||||
],
|
],
|
||||||
attributesToHighlight: ['hierarchy', 'content'],
|
attributesToHighlight: ["hierarchy", "content"],
|
||||||
attributesToSnippet: ['content:10'],
|
attributesToSnippet: ["content:10"],
|
||||||
camelCaseAttributes: ['hierarchy', 'content'],
|
camelCaseAttributes: ["hierarchy", "content"],
|
||||||
searchableAttributes: [
|
searchableAttributes: [
|
||||||
'unordered(hierarchy.lvl0)',
|
"unordered(hierarchy.lvl0)",
|
||||||
'unordered(hierarchy.lvl1)',
|
"unordered(hierarchy.lvl1)",
|
||||||
'unordered(hierarchy.lvl2)',
|
"unordered(hierarchy.lvl2)",
|
||||||
'unordered(hierarchy.lvl3)',
|
"unordered(hierarchy.lvl3)",
|
||||||
'unordered(hierarchy.lvl4)',
|
"unordered(hierarchy.lvl4)",
|
||||||
'unordered(hierarchy.lvl5)',
|
"unordered(hierarchy.lvl5)",
|
||||||
'unordered(hierarchy.lvl6)',
|
"unordered(hierarchy.lvl6)",
|
||||||
'content',
|
"content",
|
||||||
],
|
],
|
||||||
distinct: true,
|
distinct: true,
|
||||||
attributeForDistinct: 'url',
|
attributeForDistinct: "url",
|
||||||
customRanking: [
|
customRanking: [
|
||||||
'desc(weight.pageRank)',
|
"desc(weight.pageRank)",
|
||||||
'desc(weight.level)',
|
"desc(weight.level)",
|
||||||
'asc(weight.position)',
|
"asc(weight.position)",
|
||||||
],
|
],
|
||||||
ranking: [
|
ranking: [
|
||||||
'words',
|
"words",
|
||||||
'filters',
|
"filters",
|
||||||
'typo',
|
"typo",
|
||||||
'attribute',
|
"attribute",
|
||||||
'proximity',
|
"proximity",
|
||||||
'exact',
|
"exact",
|
||||||
'custom',
|
"custom",
|
||||||
],
|
],
|
||||||
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
|
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
|
||||||
highlightPostTag: '</span>',
|
highlightPostTag: "</span>",
|
||||||
minWordSizefor1Typo: 3,
|
minWordSizefor1Typo: 3,
|
||||||
minWordSizefor2Typos: 7,
|
minWordSizefor2Typos: 7,
|
||||||
allowTyposOnNumericTokens: false,
|
allowTyposOnNumericTokens: false,
|
||||||
@ -97,8 +98,8 @@ new Crawler({
|
|||||||
ignorePlurals: true,
|
ignorePlurals: true,
|
||||||
advancedSyntax: true,
|
advancedSyntax: true,
|
||||||
attributeCriteriaComputedByMinProximity: true,
|
attributeCriteriaComputedByMinProximity: true,
|
||||||
removeWordsIfNoResults: 'allOptional',
|
removeWordsIfNoResults: "allOptional",
|
||||||
separatorsToIndex: '_',
|
separatorsToIndex: "_",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
Loading…
Reference in New Issue
Block a user