web-check/api/robots-txt.js

67 lines
1.5 KiB
JavaScript
Raw Normal View History

const axios = require('axios');
const middleware = require('./_common/middleware');
const parseRobotsTxt = (content) => {
const lines = content.split('\n');
const rules = [];
lines.forEach(line => {
line = line.trim(); // This removes trailing and leading whitespaces
let match = line.match(/^(Allow|Disallow):\s*(\S*)$/i);
if (match) {
const rule = {
lbl: match[1],
val: match[2],
};
rules.push(rule);
} else {
match = line.match(/^(User-agent):\s*(\S*)$/i);
if (match) {
const rule = {
lbl: match[1],
val: match[2],
};
rules.push(rule);
}
}
});
return { robots: rules };
}
const handler = async function(url) {
let parsedURL;
try {
parsedURL = new URL(url);
} catch (error) {
return {
statusCode: 400,
body: JSON.stringify({ error: 'Invalid url query parameter' }),
};
}
const robotsURL = `${parsedURL.protocol}//${parsedURL.hostname}/robots.txt`;
try {
const response = await axios.get(robotsURL);
if (response.status === 200) {
return parseRobotsTxt(response.data);
} else {
return {
statusCode: response.status,
body: JSON.stringify({ error: 'Failed to fetch robots.txt', statusCode: response.status }),
};
}
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: `Error fetching robots.txt: ${error.message}` }),
};
}
};
exports.handler = middleware(handler);