Fetch and display externally linked content hrefs

This commit is contained in:
Alicia Sykes 2023-07-27 22:06:40 +01:00
parent e2e16c9439
commit 6a854af79e
6 changed files with 255 additions and 60 deletions

48
api/content-links.js Normal file
View File

@ -0,0 +1,48 @@
const axios = require('axios');
const cheerio = require('cheerio');
const urlLib = require('url');
exports.handler = async (event, context) => {
let url = event.queryStringParameters.url;
// Check if url includes protocol
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = 'http://' + url;
}
try {
const response = await axios.get(url);
const html = response.data;
const $ = cheerio.load(html);
const internalLinksMap = new Map();
const externalLinksMap = new Map();
$('a[href]').each((i, link) => {
const href = $(link).attr('href');
const absoluteUrl = urlLib.resolve(url, href);
if (absoluteUrl.startsWith(url)) {
const count = internalLinksMap.get(absoluteUrl) || 0;
internalLinksMap.set(absoluteUrl, count + 1);
} else if (href.startsWith('http://') || href.startsWith('https://')) {
const count = externalLinksMap.get(absoluteUrl) || 0;
externalLinksMap.set(absoluteUrl, count + 1);
}
});
// Convert maps to sorted arrays
const internalLinks = [...internalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
const externalLinks = [...externalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
return {
statusCode: 200,
body: JSON.stringify({ internal: internalLinks, external: externalLinks }),
};
} catch (error) {
console.log(error);
return {
statusCode: 500,
body: JSON.stringify({ error: 'Failed fetching data' }),
};
}
};

View File

@ -36,6 +36,7 @@
"@types/react-simple-maps": "^3.0.0",
"@types/styled-components": "^5.1.26",
"axios": "^1.4.0",
"cheerio": "^1.0.0-rc.12",
"chrome-aws-lambda": "^10.1.0",
"flatted": "^3.2.7",
"follow-redirects": "^1.15.2",

View File

@ -0,0 +1,89 @@
import { Card } from 'components/Form/Card';
import Row from 'components/Form/Row';
import Heading from 'components/Form/Heading';
import colors from 'styles/colors';
const cardStyles = `
small { margin-top: 1rem; opacity: 0.5; }
a {
color: ${colors.textColor};
}
details {
// display: inline;
display: flex;
transition: all 0.2s ease-in-out;
h3 {
display: inline;
}
summary {
padding: 0;
margin: 1rem 0 0 0;
cursor: pointer;
}
summary:before {
content: "►";
position: absolute;
margin-left: -1rem;
color: ${colors.primary};
cursor: pointer;
}
&[open] summary:before {
content: "▼";
}
}
`;
const getPathName = (link: string) => {
try {
const url = new URL(link);
return url.pathname;
} catch(e) {
return link;
}
};
const ContentLinksCard = (props: { data: any, title: string, actionButtons: any }): JSX.Element => {
const { internal, external} = props.data;
console.log('Internal Links', internal);
console.log('External Links', external);
return (
<Card heading={props.title} actionButtons={props.actionButtons} styles={cardStyles}>
<Heading as="h3" size="small" color={colors.primary}>Summary</Heading>
<Row lbl="Internal Link Count" val={internal.length} />
<Row lbl="External Link Count" val={external.length} />
{ internal && internal.length > 0 && (
<details>
<summary><Heading as="h3" size="small" color={colors.primary}>Internal Links</Heading></summary>
{internal.map((link: string) => (
<Row key={link} lbl="" val="">
<a href={link} target="_blank" rel="noreferrer">{getPathName(link)}</a>
</Row>
))}
</details>
)}
{ external && external.length > 0 && (
<details>
<summary><Heading as="h3" size="small" color={colors.primary}>External Links</Heading></summary>
{external.map((link: string) => (
<Row key={link} lbl="" val="">
<a href={link} target="_blank" rel="noreferrer">{link}</a>
</Row>
))}
</details>
)}
{/* {portData.openPorts.map((port: any) => (
<Row key={port} lbl="" val="">
<span>{port}</span>
</Row>
)
)}
<br />
<small>
Unable to establish connections to:<br />
{portData.failedPorts.join(', ')}
</small> */}
</Card>
);
}
export default ContentLinksCard;

View File

@ -202,6 +202,7 @@ const jobNames = [
'sitemap',
'hsts',
'security-txt',
'linked-pages',
// 'whois',
'features',
'carbon',

View File

@ -1,5 +1,5 @@
import { useState, useEffect, useCallback, ReactNode } from 'react';
import { useParams } from "react-router-dom";
import { useParams } from 'react-router-dom';
import styled from 'styled-components';
import { ToastContainer } from 'react-toastify';
import Masonry from 'react-masonry-css'
@ -10,10 +10,15 @@ import Modal from 'components/Form/Modal';
import Footer from 'components/misc/Footer';
import Nav from 'components/Form/Nav';
import { RowProps } from 'components/Form/Row';
import ErrorBoundary from 'components/misc/ErrorBoundary';
import docs from 'utils/docs';
import Loader from 'components/misc/Loader';
import ErrorBoundary from 'components/misc/ErrorBoundary';
import SelfScanMsg from 'components/misc/SelfScanMsg';
import DocContent from 'components/misc/DocContent';
import ProgressBar, { LoadingJob, LoadingState, initialJobs } from 'components/misc/ProgressBar';
import ActionButtons from 'components/misc/ActionButtons';
import AdditionalResources from 'components/misc/AdditionalResources';
import ViewRaw from 'components/misc/ViewRaw';
import ServerLocationCard from 'components/Results/ServerLocation';
import ServerInfoCard from 'components/Results/ServerInfo';
@ -40,17 +45,11 @@ import DomainLookup from 'components/Results/DomainLookup';
import DnsServerCard from 'components/Results/DnsServer';
import TechStackCard from 'components/Results/TechStack';
import SecurityTxtCard from 'components/Results/SecurityTxt';
import SelfScanMsg from 'components/misc/SelfScanMsg';
import ContentLinksCard from 'components/Results/ContentLinks';
import ProgressBar, { LoadingJob, LoadingState, initialJobs } from 'components/misc/ProgressBar';
import ActionButtons from 'components/misc/ActionButtons';
import keys from 'utils/get-keys';
import { determineAddressType, AddressType } from 'utils/address-type-checker';
import useMotherHook from 'hooks/motherOfAllHooks';
import {
getLocation, ServerLocation,
parseCookies, Cookie,
@ -80,25 +79,6 @@ const ResultsContent = styled.section`
padding-bottom: 1rem;
`;
const JobDocsContainer = styled.div`
p.doc-desc, p.doc-uses, ul {
margin: 0.25rem auto 1.5rem auto;
}
ul {
padding: 0 0.5rem 0 1rem;
}
ul li a {
color: ${colors.primary};
}
summary { color: ${colors.primary};}
h4 {
border-top: 1px solid ${colors.primary};
color: ${colors.primary};
opacity: 0.75;
padding: 0.5rem 0;
}
`;
const Results = (): JSX.Element => {
const startTime = new Date().getTime();
@ -400,6 +380,14 @@ const Results = (): JSX.Element => {
fetchRequest: () => fetch(`${api}/dns-server?url=${address}`).then(res => parseJson(res)),
});
// Get list of links included in the page content
const [linkedPagesResults, updateLinkedPagesResults] = useMotherHook({
jobId: 'linked-pages',
updateLoadingJobs,
addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly },
fetchRequest: () => fetch(`${api}/content-links?url=${address}`).then(res => parseJson(res)),
});
/* Cancel remaining jobs after 10 second timeout */
useEffect(() => {
const checkJobs = () => {
@ -438,7 +426,6 @@ const Results = (): JSX.Element => {
{ id: 'server-info', title: 'Server Info', result: shoadnResults?.serverInfo, Component: ServerInfoCard, refresh: updateShodanResults },
{ id: 'redirects', title: 'Redirects', result: redirectResults, Component: RedirectsCard, refresh: updateRedirectResults },
{ id: 'robots-txt', title: 'Crawl Rules', result: robotsTxtResults, Component: RobotsTxtCard, refresh: updateRobotsTxtResults },
{ id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults },
{ id: 'dnssec', title: 'DNSSEC', result: dnsSecResults, Component: DnsSecCard, refresh: updateDnsSecResults },
{ id: 'status', title: 'Server Status', result: serverStatusResults, Component: ServerStatusCard, refresh: updateServerStatusResults },
{ id: 'ports', title: 'Open Ports', result: portsResults, Component: OpenPortsCard, refresh: updatePortsResults },
@ -448,8 +435,11 @@ const Results = (): JSX.Element => {
{ id: 'hsts', title: 'HSTS Check', result: hstsResults, Component: HstsCard, refresh: updateHstsResults },
{ id: 'whois', title: 'Domain Info', result: whoIsResults, Component: WhoIsCard, refresh: updateWhoIsResults },
{ id: 'dns-server', title: 'DNS Server', result: dnsServerResults, Component: DnsServerCard, refresh: updateDnsServerResults },
{ id: 'linked-pages', title: 'Linked Pages', result: linkedPagesResults, Component: ContentLinksCard, refresh: updateLinkedPagesResults },
{ id: 'features', title: 'Site Features', result: siteFeaturesResults, Component: SiteFeaturesCard, refresh: updateSiteFeaturesResults },
{ id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults },
{ id: 'carbon', title: 'Carbon Footprint', result: carbonResults, Component: CarbonFootprintCard, refresh: updateCarbonResults },
];
const MakeActionButtons = (title: string, refresh: () => void, showInfo: (id: string) => void): ReactNode => {
@ -463,34 +453,7 @@ const Results = (): JSX.Element => {
};
const showInfo = (id: string) => {
const doc = docs.filter((doc: any) => doc.id === id)[0] || null;
setModalContent(
doc? (<JobDocsContainer>
<Heading as="h3" size="medium" color={colors.primary}>{doc.title}</Heading>
<Heading as="h4" size="small">About</Heading>
<p className="doc-desc">{doc.description}</p>
<Heading as="h4" size="small">Use Cases</Heading>
<p className="doc-uses">{doc.use}</p>
<Heading as="h4" size="small">Links</Heading>
<ul>
{doc.resources.map((resource: string | { title: string, link: string } , index: number) => (
typeof resource === 'string' ? (
<li id={`link-${index}`}><a target="_blank" rel="noreferrer" href={resource}>{resource}</a></li>
) : (
<li id={`link-${index}`}><a target="_blank" rel="noreferrer" href={resource.link}>{resource.title}</a></li>
)
))}
</ul>
<details>
<summary><Heading as="h4" size="small">Example</Heading></summary>
<img width="300" src={doc.screenshot} alt="Screenshot" />
</details>
</JobDocsContainer>)
: (
<JobDocsContainer>
<p>No Docs provided for this widget yet</p>
</JobDocsContainer>
));
setModalContent(DocContent(id));
setModalOpen(true);
};
@ -534,6 +497,8 @@ const Results = (): JSX.Element => {
}
</Masonry>
</ResultsContent>
<ViewRaw everything={resultCardData} />
<AdditionalResources url={address} />
<Footer />
<Modal isOpen={modalOpen} closeModal={()=> setModalOpen(false)}>{modalContent}</Modal>
<ToastContainer limit={3} draggablePercent={60} autoClose={2500} theme="dark" position="bottom-right" />

View File

@ -5256,6 +5256,31 @@ check-types@^11.1.1:
resolved "https://registry.yarnpkg.com/check-types/-/check-types-11.2.2.tgz#7afc0b6a860d686885062f2dba888ba5710335b4"
integrity sha512-HBiYvXvn9Z70Z88XKjz3AEKd4HJhBXsa3j7xFnITAzoS8+q6eIGi8qDB8FKPBAjtuxjI/zFpwuiCb8oDtKOYrA==
cheerio-select@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
dependencies:
boolbase "^1.0.0"
css-select "^5.1.0"
css-what "^6.1.0"
domelementtype "^2.3.0"
domhandler "^5.0.3"
domutils "^3.0.1"
cheerio@^1.0.0-rc.12:
version "1.0.0-rc.12"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
dependencies:
cheerio-select "^2.1.0"
dom-serializer "^2.0.0"
domhandler "^5.0.3"
domutils "^3.0.1"
htmlparser2 "^8.0.1"
parse5 "^7.0.0"
parse5-htmlparser2-tree-adapter "^7.0.0"
chokidar@3.5.3, chokidar@^3.4.0, chokidar@^3.4.2, chokidar@^3.5.3:
version "3.5.3"
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd"
@ -5976,6 +6001,17 @@ css-select@^4.1.3:
domutils "^2.8.0"
nth-check "^2.0.1"
css-select@^5.1.0:
version "5.1.0"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6"
integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==
dependencies:
boolbase "^1.0.0"
css-what "^6.1.0"
domhandler "^5.0.2"
domutils "^3.0.1"
nth-check "^2.0.1"
css-to-react-native@^3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/css-to-react-native/-/css-to-react-native-3.2.0.tgz#cdd8099f71024e149e4f6fe17a7d46ecd55f1e32"
@ -6006,7 +6042,7 @@ css-what@^3.2.1:
resolved "https://registry.yarnpkg.com/css-what/-/css-what-3.4.2.tgz#ea7026fcb01777edbde52124e21f327e7ae950e4"
integrity sha512-ACUm3L0/jiZTqfzRM3Hi9Q8eZqd6IK37mMWPLz9PJxkLWllYeRf+EHUSHYEtFop2Eqytaq1FizFVh7XfBnXCDQ==
css-what@^6.0.1:
css-what@^6.0.1, css-what@^6.1.0:
version "6.1.0"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4"
integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==
@ -6593,12 +6629,21 @@ dom-serializer@^1.0.1:
domhandler "^4.2.0"
entities "^2.0.0"
dom-serializer@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53"
integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==
dependencies:
domelementtype "^2.3.0"
domhandler "^5.0.2"
entities "^4.2.0"
domelementtype@1:
version "1.3.1"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f"
integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==
domelementtype@^2.0.1, domelementtype@^2.2.0:
domelementtype@^2.0.1, domelementtype@^2.2.0, domelementtype@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d"
integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==
@ -6617,6 +6662,13 @@ domhandler@^4.0.0, domhandler@^4.2.0, domhandler@^4.3.1:
dependencies:
domelementtype "^2.2.0"
domhandler@^5.0.2, domhandler@^5.0.3:
version "5.0.3"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31"
integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==
dependencies:
domelementtype "^2.3.0"
domutils@^1.7.0:
version "1.7.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a"
@ -6634,6 +6686,15 @@ domutils@^2.5.2, domutils@^2.8.0:
domelementtype "^2.2.0"
domhandler "^4.2.0"
domutils@^3.0.1:
version "3.1.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e"
integrity sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==
dependencies:
dom-serializer "^2.0.0"
domelementtype "^2.3.0"
domhandler "^5.0.3"
dot-case@^3.0.4:
version "3.0.4"
resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751"
@ -6770,6 +6831,11 @@ entities@^2.0.0, entities@^2.2.0:
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
entities@^4.2.0, entities@^4.4.0:
version "4.5.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48"
integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==
env-paths@3.0.0, env-paths@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-3.0.0.tgz#2f1e89c2f6dbd3408e1b1711dd82d62e317f58da"
@ -8542,6 +8608,16 @@ htmlparser2@^6.1.0:
domutils "^2.5.2"
entities "^2.0.0"
htmlparser2@^8.0.1:
version "8.0.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"
integrity sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==
dependencies:
domelementtype "^2.3.0"
domhandler "^5.0.3"
domutils "^3.0.1"
entities "^4.4.0"
http-cache-semantics@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz#abe02fcb2985460bf0323be664436ec3476a6d5a"
@ -11800,11 +11876,26 @@ parse-ms@^3.0.0:
resolved "https://registry.yarnpkg.com/parse-ms/-/parse-ms-3.0.0.tgz#3ea24a934913345fcc3656deda72df921da3a70e"
integrity sha512-Tpb8Z7r7XbbtBTrM9UhpkzzaMrqA2VXMT3YChzYltwV3P3pM6t8wl7TvpMnSTosz1aQAdVib7kdoys7vYOPerw==
parse5-htmlparser2-tree-adapter@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==
dependencies:
domhandler "^5.0.2"
parse5 "^7.0.0"
parse5@6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b"
integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==
parse5@^7.0.0:
version "7.1.2"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32"
integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==
dependencies:
entities "^4.4.0"
parseurl@~1.3.2, parseurl@~1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"