2024-05-06 22:51:32 +02:00
|
|
|
import axios from 'axios';
|
|
|
|
import middleware from './_common/middleware.js';
|
2023-08-10 20:30:21 +02:00
|
|
|
|
|
|
|
const parseRobotsTxt = (content) => {
|
|
|
|
const lines = content.split('\n');
|
|
|
|
const rules = [];
|
|
|
|
|
|
|
|
lines.forEach(line => {
|
|
|
|
line = line.trim(); // This removes trailing and leading whitespaces
|
|
|
|
|
|
|
|
let match = line.match(/^(Allow|Disallow):\s*(\S*)$/i);
|
|
|
|
if (match) {
|
|
|
|
const rule = {
|
|
|
|
lbl: match[1],
|
|
|
|
val: match[2],
|
|
|
|
};
|
|
|
|
|
|
|
|
rules.push(rule);
|
|
|
|
} else {
|
|
|
|
match = line.match(/^(User-agent):\s*(\S*)$/i);
|
|
|
|
if (match) {
|
|
|
|
const rule = {
|
|
|
|
lbl: match[1],
|
|
|
|
val: match[2],
|
|
|
|
};
|
|
|
|
|
|
|
|
rules.push(rule);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return { robots: rules };
|
|
|
|
}
|
|
|
|
|
2024-05-06 22:51:32 +02:00
|
|
|
const robotsHandler = async function(url) {
|
2023-08-10 20:30:21 +02:00
|
|
|
let parsedURL;
|
|
|
|
try {
|
|
|
|
parsedURL = new URL(url);
|
|
|
|
} catch (error) {
|
|
|
|
return {
|
|
|
|
statusCode: 400,
|
|
|
|
body: JSON.stringify({ error: 'Invalid url query parameter' }),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
const robotsURL = `${parsedURL.protocol}//${parsedURL.hostname}/robots.txt`;
|
|
|
|
|
|
|
|
try {
|
|
|
|
const response = await axios.get(robotsURL);
|
|
|
|
|
|
|
|
if (response.status === 200) {
|
2023-08-18 20:27:38 +02:00
|
|
|
const parsedData = parseRobotsTxt(response.data);
|
|
|
|
if (!parsedData.robots || parsedData.robots.length === 0) {
|
|
|
|
return { skipped: 'No robots.txt file present, unable to continue' };
|
|
|
|
}
|
|
|
|
return parsedData;
|
2023-08-10 20:30:21 +02:00
|
|
|
} else {
|
|
|
|
return {
|
|
|
|
statusCode: response.status,
|
|
|
|
body: JSON.stringify({ error: 'Failed to fetch robots.txt', statusCode: response.status }),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
} catch (error) {
|
|
|
|
return {
|
|
|
|
statusCode: 500,
|
|
|
|
body: JSON.stringify({ error: `Error fetching robots.txt: ${error.message}` }),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2024-05-06 22:51:32 +02:00
|
|
|
export const handler = middleware(robotsHandler);
|
|
|
|
export default handler;
|