web-check/api/sitemap.js

55 lines
1.5 KiB
JavaScript
Raw Normal View History

const middleware = require('./_common/middleware');
const axios = require('axios');
const xml2js = require('xml2js');
const handler = async (url) => {
2023-07-29 10:38:25 +02:00
let sitemapUrl = `${url}/sitemap.xml`;
2024-03-20 22:19:58 +01:00
const hardTimeOut = 5000;
try {
2023-07-29 10:38:25 +02:00
// Try to fetch sitemap directly
let sitemapRes;
try {
2024-03-20 22:19:58 +01:00
sitemapRes = await axios.get(sitemapUrl, { timeout: hardTimeOut });
2023-07-29 10:38:25 +02:00
} catch (error) {
if (error.response && error.response.status === 404) {
// If sitemap not found, try to fetch it from robots.txt
2024-03-20 22:19:58 +01:00
const robotsRes = await axios.get(`${url}/robots.txt`, { timeout: hardTimeOut });
2023-07-29 10:38:25 +02:00
const robotsTxt = robotsRes.data.split('\n');
2023-07-29 10:38:25 +02:00
for (let line of robotsTxt) {
if (line.toLowerCase().startsWith('sitemap:')) {
sitemapUrl = line.split(' ')[1].trim();
break;
}
}
2023-07-29 10:38:25 +02:00
if (!sitemapUrl) {
return { skipped: 'No sitemap found' };
2023-07-29 10:38:25 +02:00
}
2024-03-20 22:19:58 +01:00
sitemapRes = await axios.get(sitemapUrl, { timeout: hardTimeOut });
2023-07-29 10:38:25 +02:00
} else {
throw error; // If other error, throw it
}
}
2023-07-29 10:38:25 +02:00
const parser = new xml2js.Parser();
const sitemap = await parser.parseStringPromise(sitemapRes.data);
return sitemap;
} catch (error) {
2023-07-29 10:38:25 +02:00
if (error.code === 'ECONNABORTED') {
2024-03-20 22:19:58 +01:00
return { error: `Request timed-out after ${hardTimeOut}ms` };
2023-07-29 10:38:25 +02:00
} else {
return { error: error.message };
2023-07-29 10:38:25 +02:00
}
}
};
module.exports = middleware(handler);
module.exports.handler = middleware(handler);