Implement middleware into newer lambdas

This commit is contained in:
Alicia Sykes
2023-08-10 14:26:36 +01:00
21 changed files with 604 additions and 173 deletions

View File

@ -29,6 +29,19 @@ const handler = async (url) => {
const internalLinks = [...internalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
const externalLinks = [...externalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
// If there were no links, then mark as skipped and show reasons
if (internalLinks.length === 0 && externalLinks.length === 0) {
return {
statusCode: 400,
body: JSON.stringify({
skipped: 'No internal or external links found. '
+ 'This may be due to the website being dynamically rendered, using a client-side framework (like React), and without SSR enabled. '
+ 'That would mean that the static HTML returned from the HTTP request doesn\'t contain any meaningful content for Web-Check to analyze. '
+ 'You can rectify this by using a headless browser to render the page instead.',
}),
};
}
return { internal: internalLinks, external: externalLinks };
};

View File

@ -34,6 +34,13 @@ const handler = async (url) => {
}).on('error', reject);
});
if (!carbonData.statistics || (carbonData.statistics.adjustedBytes === 0 && carbonData.statistics.energy === 0)) {
return {
statusCode: 200,
body: JSON.stringify({ skipped: 'Not enough info to get carbon data' }),
};
}
carbonData.scanUrl = url;
return carbonData;
} catch (error) {

79
api/mail-config.js Normal file
View File

@ -0,0 +1,79 @@
const dns = require('dns').promises;
const URL = require('url-parse');
exports.handler = async (event, context) => {
try {
let domain = event.queryStringParameters.url;
const parsedUrl = new URL(domain);
domain = parsedUrl.hostname || parsedUrl.pathname;
// Get MX records
const mxRecords = await dns.resolveMx(domain);
// Get TXT records
const txtRecords = await dns.resolveTxt(domain);
// Filter for only email related TXT records (SPF, DKIM, DMARC, and certain provider verifications)
const emailTxtRecords = txtRecords.filter(record => {
const recordString = record.join('');
return (
recordString.startsWith('v=spf1') ||
recordString.startsWith('v=DKIM1') ||
recordString.startsWith('v=DMARC1') ||
recordString.startsWith('protonmail-verification=') ||
recordString.startsWith('google-site-verification=') || // Google Workspace
recordString.startsWith('MS=') || // Microsoft 365
recordString.startsWith('zoho-verification=') || // Zoho
recordString.startsWith('titan-verification=') || // Titan
recordString.includes('bluehost.com') // BlueHost
);
});
// Identify specific mail services
const mailServices = emailTxtRecords.map(record => {
const recordString = record.join('');
if (recordString.startsWith('protonmail-verification=')) {
return { provider: 'ProtonMail', value: recordString.split('=')[1] };
} else if (recordString.startsWith('google-site-verification=')) {
return { provider: 'Google Workspace', value: recordString.split('=')[1] };
} else if (recordString.startsWith('MS=')) {
return { provider: 'Microsoft 365', value: recordString.split('=')[1] };
} else if (recordString.startsWith('zoho-verification=')) {
return { provider: 'Zoho', value: recordString.split('=')[1] };
} else if (recordString.startsWith('titan-verification=')) {
return { provider: 'Titan', value: recordString.split('=')[1] };
} else if (recordString.includes('bluehost.com')) {
return { provider: 'BlueHost', value: recordString };
} else {
return null;
}
}).filter(record => record !== null);
// Check MX records for Yahoo
const yahooMx = mxRecords.filter(record => record.exchange.includes('yahoodns.net'));
if (yahooMx.length > 0) {
mailServices.push({ provider: 'Yahoo', value: yahooMx[0].exchange });
}
return {
statusCode: 200,
body: JSON.stringify({
mxRecords,
txtRecords: emailTxtRecords,
mailServices,
}),
};
} catch (error) {
if (error.code === 'ENOTFOUND' || error.code === 'ENODATA') {
return {
statusCode: 200,
body: JSON.stringify({ skipped: 'No mail server in use on this domain' }),
};
} else {
return {
statusCode: 500,
body: JSON.stringify({ error: error.message }),
};
}
}
};

View File

@ -1,33 +1,64 @@
const commonMiddleware = require('./_common/middleware');
const axios = require('axios');
const xml2js = require('xml2js');
const middleware = require('./_common/middleware');
const fetchSitemapHandler = async (url) => {
let sitemapUrl;
const handler = async (url) => {
let sitemapUrl = `${url}/sitemap.xml`;
try {
// Fetch robots.txt
const robotsRes = await axios.get(`${url}/robots.txt`);
const robotsTxt = robotsRes.data.split('\n');
// Try to fetch sitemap directly
let sitemapRes;
try {
sitemapRes = await axios.get(sitemapUrl, { timeout: 5000 });
} catch (error) {
if (error.response && error.response.status === 404) {
// If sitemap not found, try to fetch it from robots.txt
const robotsRes = await axios.get(`${url}/robots.txt`, { timeout: 5000 });
const robotsTxt = robotsRes.data.split('\n');
for (let line of robotsTxt) {
if (line.startsWith('Sitemap:')) {
sitemapUrl = line.split(' ')[1];
for (let line of robotsTxt) {
if (line.toLowerCase().startsWith('sitemap:')) {
sitemapUrl = line.split(' ')[1].trim();
break;
}
}
if (!sitemapUrl) {
return {
statusCode: 404,
body: JSON.stringify({ skipped: 'No sitemap found' }),
};
}
sitemapRes = await axios.get(sitemapUrl, { timeout: 5000 });
} else {
throw error; // If other error, throw it
}
}
if (!sitemapUrl) {
throw new Error('Sitemap not found in robots.txt');
}
const parser = new xml2js.Parser();
const sitemap = await parser.parseStringPromise(sitemapRes.data);
// Fetch sitemap
const sitemapRes = await axios.get(sitemapUrl);
const sitemap = await xml2js.parseStringPromise(sitemapRes.data);
return sitemap;
return {
statusCode: 200,
body: JSON.stringify(sitemap),
};
} catch (error) {
throw new Error(error.message);
// If error occurs
console.log(error.message);
if (error.code === 'ECONNABORTED') {
return {
statusCode: 500,
body: JSON.stringify({ error: 'Request timed out' }),
};
} else {
return {
statusCode: 500,
body: JSON.stringify({ error: error.message }),
};
}
}
};
exports.handler = middleware(fetchSitemapHandler);
exports.handler = commonMiddleware(handler);

68
api/social-tags.js Normal file
View File

@ -0,0 +1,68 @@
const axios = require('axios');
const cheerio = require('cheerio');
exports.handler = async (event, context) => {
let url = event.queryStringParameters.url;
// Check if url includes protocol
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = 'http://' + url;
}
try {
const response = await axios.get(url);
const html = response.data;
const $ = cheerio.load(html);
const metadata = {
// Basic meta tags
title: $('head title').text(),
description: $('meta[name="description"]').attr('content'),
keywords: $('meta[name="keywords"]').attr('content'),
canonicalUrl: $('link[rel="canonical"]').attr('href'),
// OpenGraph Protocol
ogTitle: $('meta[property="og:title"]').attr('content'),
ogType: $('meta[property="og:type"]').attr('content'),
ogImage: $('meta[property="og:image"]').attr('content'),
ogUrl: $('meta[property="og:url"]').attr('content'),
ogDescription: $('meta[property="og:description"]').attr('content'),
ogSiteName: $('meta[property="og:site_name"]').attr('content'),
// Twitter Cards
twitterCard: $('meta[name="twitter:card"]').attr('content'),
twitterSite: $('meta[name="twitter:site"]').attr('content'),
twitterCreator: $('meta[name="twitter:creator"]').attr('content'),
twitterTitle: $('meta[name="twitter:title"]').attr('content'),
twitterDescription: $('meta[name="twitter:description"]').attr('content'),
twitterImage: $('meta[name="twitter:image"]').attr('content'),
// Misc
themeColor: $('meta[name="theme-color"]').attr('content'),
robots: $('meta[name="robots"]').attr('content'),
googlebot: $('meta[name="googlebot"]').attr('content'),
generator: $('meta[name="generator"]').attr('content'),
viewport: $('meta[name="viewport"]').attr('content'),
author: $('meta[name="author"]').attr('content'),
publisher: $('link[rel="publisher"]').attr('href'),
favicon: $('link[rel="icon"]').attr('href')
};
if (Object.keys(metadata).length === 0) {
return {
statusCode: 200,
body: JSON.stringify({ skipped: 'No metadata found' }),
};
}
return {
statusCode: 200,
body: JSON.stringify(metadata),
};
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: 'Failed fetching data' }),
};
}
};