2024-05-06 22:51:32 +02:00
|
|
|
import axios from 'axios';
|
|
|
|
import xml2js from 'xml2js';
|
|
|
|
import middleware from './_common/middleware.js';
|
2023-08-10 15:26:36 +02:00
|
|
|
|
2024-05-06 22:51:32 +02:00
|
|
|
const sitemapHandler = async (url) => {
|
2023-07-29 10:38:25 +02:00
|
|
|
let sitemapUrl = `${url}/sitemap.xml`;
|
2023-07-21 21:53:54 +02:00
|
|
|
|
2024-03-20 22:19:58 +01:00
|
|
|
const hardTimeOut = 5000;
|
|
|
|
|
2023-07-21 21:53:54 +02:00
|
|
|
try {
|
2023-07-29 10:38:25 +02:00
|
|
|
// Try to fetch sitemap directly
|
|
|
|
let sitemapRes;
|
|
|
|
try {
|
2024-03-20 22:19:58 +01:00
|
|
|
sitemapRes = await axios.get(sitemapUrl, { timeout: hardTimeOut });
|
2023-07-29 10:38:25 +02:00
|
|
|
} catch (error) {
|
|
|
|
if (error.response && error.response.status === 404) {
|
|
|
|
// If sitemap not found, try to fetch it from robots.txt
|
2024-03-20 22:19:58 +01:00
|
|
|
const robotsRes = await axios.get(`${url}/robots.txt`, { timeout: hardTimeOut });
|
2023-07-29 10:38:25 +02:00
|
|
|
const robotsTxt = robotsRes.data.split('\n');
|
2023-07-21 21:53:54 +02:00
|
|
|
|
2023-07-29 10:38:25 +02:00
|
|
|
for (let line of robotsTxt) {
|
|
|
|
if (line.toLowerCase().startsWith('sitemap:')) {
|
|
|
|
sitemapUrl = line.split(' ')[1].trim();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-07-21 21:53:54 +02:00
|
|
|
|
2023-07-29 10:38:25 +02:00
|
|
|
if (!sitemapUrl) {
|
2023-09-03 17:58:46 +02:00
|
|
|
return { skipped: 'No sitemap found' };
|
2023-07-29 10:38:25 +02:00
|
|
|
}
|
|
|
|
|
2024-03-20 22:19:58 +01:00
|
|
|
sitemapRes = await axios.get(sitemapUrl, { timeout: hardTimeOut });
|
2023-07-29 10:38:25 +02:00
|
|
|
} else {
|
|
|
|
throw error; // If other error, throw it
|
|
|
|
}
|
2023-07-21 21:53:54 +02:00
|
|
|
}
|
|
|
|
|
2023-07-29 10:38:25 +02:00
|
|
|
const parser = new xml2js.Parser();
|
|
|
|
const sitemap = await parser.parseStringPromise(sitemapRes.data);
|
2023-07-21 21:53:54 +02:00
|
|
|
|
2023-09-03 17:58:46 +02:00
|
|
|
return sitemap;
|
2023-07-21 21:53:54 +02:00
|
|
|
} catch (error) {
|
2023-07-29 10:38:25 +02:00
|
|
|
if (error.code === 'ECONNABORTED') {
|
2024-03-20 22:19:58 +01:00
|
|
|
return { error: `Request timed-out after ${hardTimeOut}ms` };
|
2023-07-29 10:38:25 +02:00
|
|
|
} else {
|
2023-09-03 17:58:46 +02:00
|
|
|
return { error: error.message };
|
2023-07-29 10:38:25 +02:00
|
|
|
}
|
2023-07-21 21:53:54 +02:00
|
|
|
}
|
|
|
|
};
|
2023-08-09 23:33:36 +02:00
|
|
|
|
2024-05-06 22:51:32 +02:00
|
|
|
export const handler = middleware(sitemapHandler);
|
|
|
|
export default handler;
|
2023-09-03 13:27:04 +02:00
|
|
|
|