Re-wrote sitemap lambda function

This commit is contained in:
Alicia Sykes 2023-07-29 09:38:25 +01:00
parent abff9283dc
commit df24445ac6

View File

@ -2,40 +2,60 @@ const axios = require('axios');
const xml2js = require('xml2js'); const xml2js = require('xml2js');
exports.handler = async (event) => { exports.handler = async (event) => {
const baseUrl = event.queryStringParameters.url.replace(/^(?:https?:\/\/)?/i, ""); const url = event.queryStringParameters.url;
const url = baseUrl.startsWith('http') ? baseUrl : `http://${baseUrl}`; let sitemapUrl = `${url}/sitemap.xml`;
let sitemapUrl;
try { try {
// Fetch robots.txt // Try to fetch sitemap directly
const robotsRes = await axios.get(`${url}/robots.txt`); let sitemapRes;
const robotsTxt = robotsRes.data.split('\n'); try {
sitemapRes = await axios.get(sitemapUrl, { timeout: 5000 });
} catch (error) {
if (error.response && error.response.status === 404) {
// If sitemap not found, try to fetch it from robots.txt
const robotsRes = await axios.get(`${url}/robots.txt`, { timeout: 5000 });
const robotsTxt = robotsRes.data.split('\n');
for (let line of robotsTxt) { for (let line of robotsTxt) {
if (line.startsWith('Sitemap:')) { if (line.toLowerCase().startsWith('sitemap:')) {
sitemapUrl = line.split(' ')[1]; sitemapUrl = line.split(' ')[1].trim();
break;
}
}
if (!sitemapUrl) {
return {
statusCode: 404,
body: JSON.stringify({ skipped: 'No sitemap found' }),
};
}
sitemapRes = await axios.get(sitemapUrl, { timeout: 5000 });
} else {
throw error; // If other error, throw it
} }
} }
if (!sitemapUrl) { const parser = new xml2js.Parser();
return { const sitemap = await parser.parseStringPromise(sitemapRes.data);
statusCode: 404,
body: JSON.stringify({ error: 'Sitemap not found in robots.txt' }),
};
}
// Fetch sitemap
const sitemapRes = await axios.get(sitemapUrl);
const sitemap = await xml2js.parseStringPromise(sitemapRes.data);
return { return {
statusCode: 200, statusCode: 200,
body: JSON.stringify(sitemap), body: JSON.stringify(sitemap),
}; };
} catch (error) { } catch (error) {
return { // If error occurs
statusCode: 500, console.log(error.message);
body: JSON.stringify({ error: error.message }), if (error.code === 'ECONNABORTED') {
}; return {
statusCode: 500,
body: JSON.stringify({ error: 'Request timed out' }),
};
} else {
return {
statusCode: 500,
body: JSON.stringify({ error: error.message }),
};
}
} }
}; };