mirror of
https://github.com/Lissy93/web-check.git
synced 2025-01-10 00:08:32 +01:00
Find, parse and render info from Sitemap
This commit is contained in:
parent
fc030ffcd6
commit
7ebe96b9be
@ -124,6 +124,11 @@
|
||||
to = "/.netlify/functions/tech-stack"
|
||||
status = 301
|
||||
force = true
|
||||
[[redirects]]
|
||||
from = "/sitemap"
|
||||
to = "/.netlify/functions/sitemap"
|
||||
status = 301
|
||||
force = true
|
||||
|
||||
# For router history mode, ensure pages land on index
|
||||
[[redirects]]
|
||||
|
@ -33,7 +33,8 @@
|
||||
"tsparticles": "^2.0.6",
|
||||
"typescript": "^4.7.3",
|
||||
"wappalyzer": "^6.10.63",
|
||||
"web-vitals": "^2.1.4"
|
||||
"web-vitals": "^2.1.4",
|
||||
"xml2js": "^0.6.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "react-scripts start",
|
||||
|
41
server/lambda/sitemap.js
Normal file
41
server/lambda/sitemap.js
Normal file
@ -0,0 +1,41 @@
|
||||
const axios = require('axios');
|
||||
const xml2js = require('xml2js');
|
||||
|
||||
exports.handler = async (event) => {
|
||||
const baseUrl = event.queryStringParameters.url.replace(/^(?:https?:\/\/)?/i, "");
|
||||
const url = baseUrl.startsWith('http') ? baseUrl : `http://${baseUrl}`;
|
||||
let sitemapUrl;
|
||||
|
||||
try {
|
||||
// Fetch robots.txt
|
||||
const robotsRes = await axios.get(`${url}/robots.txt`);
|
||||
const robotsTxt = robotsRes.data.split('\n');
|
||||
|
||||
for (let line of robotsTxt) {
|
||||
if (line.startsWith('Sitemap:')) {
|
||||
sitemapUrl = line.split(' ')[1];
|
||||
}
|
||||
}
|
||||
|
||||
if (!sitemapUrl) {
|
||||
return {
|
||||
statusCode: 404,
|
||||
body: JSON.stringify({ error: 'Sitemap not found in robots.txt' }),
|
||||
};
|
||||
}
|
||||
|
||||
// Fetch sitemap
|
||||
const sitemapRes = await axios.get(sitemapUrl);
|
||||
const sitemap = await xml2js.parseStringPromise(sitemapRes.data);
|
||||
|
||||
return {
|
||||
statusCode: 200,
|
||||
body: JSON.stringify(sitemap),
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
statusCode: 500,
|
||||
body: JSON.stringify({ error: error.message }),
|
||||
};
|
||||
}
|
||||
};
|
58
src/components/Results/Sitemap.tsx
Normal file
58
src/components/Results/Sitemap.tsx
Normal file
@ -0,0 +1,58 @@
|
||||
|
||||
import { Card } from 'components/Form/Card';
|
||||
import Heading from 'components/Form/Heading';
|
||||
import Row, { ExpandableRow } from 'components/Form/Row';
|
||||
import colors from 'styles/colors';
|
||||
|
||||
const cardStyles = `
|
||||
max-height: 50rem;
|
||||
overflow-y: auto;
|
||||
a {
|
||||
color: ${colors.primary};
|
||||
}
|
||||
small {
|
||||
margin-top: 1rem;
|
||||
opacity: 0.5;
|
||||
display: block;
|
||||
a { color: ${colors.primary}; }
|
||||
}
|
||||
`;
|
||||
|
||||
const SitemapCard = (props: {data: any, title: string, actionButtons: any }): JSX.Element => {
|
||||
console.log(props.data);
|
||||
const normalSiteMap = props.data.url || props.data.urlset?.url || null;
|
||||
const siteMapIndex = props.data.sitemapindex?.sitemap || null;
|
||||
|
||||
const makeExpandableRowData = (site: any) => {
|
||||
const results = [];
|
||||
if (site.lastmod) { results.push({lbl: 'Last Modified', val: site.lastmod[0]}); }
|
||||
if (site.changefreq) { results.push({lbl: 'Change Frequency', val: site.changefreq[0]}); }
|
||||
if (site.priority) { results.push({lbl: 'Priority', val: site.priority[0]}); }
|
||||
return results;
|
||||
};
|
||||
|
||||
const getPathFromUrl = (url: string) => {
|
||||
const urlObj = new URL(url);
|
||||
return urlObj.pathname;
|
||||
};
|
||||
|
||||
return (
|
||||
<Card heading={props.title} actionButtons={props.actionButtons} styles={cardStyles}>
|
||||
{
|
||||
normalSiteMap && normalSiteMap.map((subpage: any, index: number) => {
|
||||
return (<ExpandableRow lbl={getPathFromUrl(subpage.loc[0])} val="" rowList={makeExpandableRowData(subpage)}></ExpandableRow>)
|
||||
})
|
||||
}
|
||||
{ siteMapIndex && <p>
|
||||
This site returns a sitemap index, which is a list of sitemaps.
|
||||
</p>}
|
||||
{
|
||||
siteMapIndex && siteMapIndex.map((subpage: any, index: number) => {
|
||||
return (<Row lbl="" val=""><a href={subpage.loc[0]}>{getPathFromUrl(subpage.loc[0])}</a></Row>);
|
||||
})
|
||||
}
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
export default SitemapCard;
|
@ -190,7 +190,7 @@ const jobNames = [
|
||||
'domain-lookup',
|
||||
'tech-stack',
|
||||
'hosts',
|
||||
'lighthouse',
|
||||
'quality',
|
||||
'cookies',
|
||||
'server-info',
|
||||
'redirects',
|
||||
@ -200,8 +200,9 @@ const jobNames = [
|
||||
'ports',
|
||||
'screenshot',
|
||||
'txt-records',
|
||||
'sitemap',
|
||||
'hsts',
|
||||
'whois',
|
||||
// 'whois',
|
||||
'features',
|
||||
'carbon',
|
||||
'trace-route',
|
||||
|
@ -35,6 +35,7 @@ import CarbonFootprintCard from 'components/Results/CarbonFootprint';
|
||||
import SiteFeaturesCard from 'components/Results/SiteFeatures';
|
||||
import DnsSecCard from 'components/Results/DnsSec';
|
||||
import HstsCard from 'components/Results/Hsts';
|
||||
import SitemapCard from 'components/Results/Sitemap';
|
||||
import DomainLookup from 'components/Results/DomainLookup';
|
||||
import DnsServerCard from 'components/Results/DnsServer';
|
||||
import TechStackCard from 'components/Results/TechStack';
|
||||
@ -60,24 +61,11 @@ import {
|
||||
const ResultsOuter = styled.div`
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
.my-masonry-grid {
|
||||
display: -webkit-box; /* Not needed if autoprefixing */
|
||||
display: -ms-flexbox; /* Not needed if autoprefixing */
|
||||
.masonry-grid {
|
||||
display: flex;
|
||||
// margin: 1rem;
|
||||
// margin-left: -30px; /* gutter size offset */
|
||||
width: auto;
|
||||
}
|
||||
.my-masonry-grid_column {
|
||||
// margin-left: 30px; /* gutter size */
|
||||
background-clip: padding-box;
|
||||
}
|
||||
|
||||
/* Style your items */
|
||||
.my-masonry-grid_column > div { /* change div to reference your elements you put in <Masonry> */
|
||||
// background: grey;
|
||||
// margin-bottom: 30px;
|
||||
}
|
||||
.masonry-grid-col section { margin: 1rem 0.5rem; }
|
||||
`;
|
||||
|
||||
const ResultsContent = styled.section`
|
||||
@ -165,7 +153,6 @@ const Results = (): JSX.Element => {
|
||||
}, []);
|
||||
|
||||
const parseJson = (response: Response): Promise<any> => {
|
||||
// return response.json()
|
||||
return new Promise((resolve) => {
|
||||
if (response.ok) {
|
||||
response.json()
|
||||
@ -181,20 +168,6 @@ const Results = (): JSX.Element => {
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
|
||||
// const parseJson = (response: Response): Promise<any> => {
|
||||
// if (response.status >= 400) {
|
||||
// return new Promise((resolve) => resolve({ error: `Failed to fetch data: ${response.statusText}` }));
|
||||
// }
|
||||
// return new Promise((resolve) => {
|
||||
// if (!response) { resolve({ error: 'No response from server' }); }
|
||||
// response.json()
|
||||
// .catch(error => resolve({ error: `Failed to process response, likely due to Netlify's 10-sec limit on lambda functions. Error: ${error}`}));
|
||||
// });
|
||||
// };
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
if (!addressType || addressType === 'empt') {
|
||||
@ -312,16 +285,6 @@ const Results = (): JSX.Element => {
|
||||
.then(res => applyWhoIsResults(res)),
|
||||
});
|
||||
|
||||
// Fetch and parse built-with results
|
||||
// const [technologyResults, updateTechnologyResults] = useMotherHook<TechnologyGroup[]>({
|
||||
// jobId: 'built-with',
|
||||
// updateLoadingJobs,
|
||||
// addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly },
|
||||
// fetchRequest: () => fetch(`https://api.builtwith.com/v21/api.json?KEY=${keys.builtWith}&LOOKUP=${address}`)
|
||||
// .then(res => parseJson(res))
|
||||
// .then(res => makeTechnologies(res)),
|
||||
// });
|
||||
|
||||
// Fetches DNS TXT records
|
||||
const [txtRecordResults, updateTxtRecordResults] = useMotherHook({
|
||||
jobId: 'txt-records',
|
||||
@ -378,6 +341,14 @@ const Results = (): JSX.Element => {
|
||||
fetchRequest: () => fetch(`/check-hsts?url=${address}`).then(res => parseJson(res)),
|
||||
});
|
||||
|
||||
// Get a websites listed pages, from sitemap
|
||||
const [sitemapResults, updateSitemapResults] = useMotherHook({
|
||||
jobId: 'sitemap',
|
||||
updateLoadingJobs,
|
||||
addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly },
|
||||
fetchRequest: () => fetch(`/sitemap?url=${address}`).then(res => parseJson(res)),
|
||||
});
|
||||
|
||||
// Get site features from BuiltWith
|
||||
const [siteFeaturesResults, updateSiteFeaturesResults] = useMotherHook({
|
||||
jobId: 'features',
|
||||
@ -449,12 +420,13 @@ const Results = (): JSX.Element => {
|
||||
{ id: 'dns', title: 'DNS Records', result: dnsResults, Component: DnsRecordsCard, refresh: updateDnsResults },
|
||||
{ id: 'hosts', title: 'Host Names', result: shoadnResults?.hostnames, Component: HostNamesCard, refresh: updateShodanResults },
|
||||
{ id: 'tech-stack', title: 'Tech Stack', result: techStackResults, Component: TechStackCard, refresh: updateTechStackResults },
|
||||
{ id: 'lighthouse', title: 'Performance', result: lighthouseResults, Component: LighthouseCard, refresh: updateLighthouseResults },
|
||||
{ id: 'quality', title: 'Quality Summary', result: lighthouseResults, Component: LighthouseCard, refresh: updateLighthouseResults },
|
||||
{ id: 'cookies', title: 'Cookies', result: cookieResults, Component: CookiesCard, refresh: updateCookieResults },
|
||||
{ id: 'trace-route', title: 'Trace Route', result: traceRouteResults, Component: TraceRouteCard, refresh: updateTraceRouteResults },
|
||||
{ id: 'server-info', title: 'Server Info', result: shoadnResults?.serverInfo, Component: ServerInfoCard, refresh: updateShodanResults },
|
||||
{ id: 'redirects', title: 'Redirects', result: redirectResults, Component: RedirectsCard, refresh: updateRedirectResults },
|
||||
{ id: 'robots-txt', title: 'Crawl Rules', result: robotsTxtResults, Component: RobotsTxtCard, refresh: updateRobotsTxtResults },
|
||||
{ id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults },
|
||||
{ id: 'dnssec', title: 'DNSSEC', result: dnsSecResults, Component: DnsSecCard, refresh: updateDnsSecResults },
|
||||
{ id: 'status', title: 'Server Status', result: serverStatusResults, Component: ServerStatusCard, refresh: updateServerStatusResults },
|
||||
{ id: 'ports', title: 'Open Ports', result: portsResults, Component: OpenPortsCard, refresh: updatePortsResults },
|
||||
@ -526,8 +498,8 @@ const Results = (): JSX.Element => {
|
||||
|
||||
<Masonry
|
||||
breakpointCols={{ 10000: 12, 4000: 9, 3600: 8, 3200: 7, 2800: 6, 2400: 5, 2000: 4, 1600: 3, 1200: 2, 800: 1 }}
|
||||
className="my-masonry-grid"
|
||||
columnClassName="my-masonry-grid_column">
|
||||
className="masonry-grid"
|
||||
columnClassName="masonry-grid-col">
|
||||
{
|
||||
resultCardData.map(({ id, title, result, refresh, Component }, index: number) => (
|
||||
(result && !result.error) ? (
|
||||
|
15
yarn.lock
15
yarn.lock
@ -8699,7 +8699,7 @@ sass-loader@^12.3.0:
|
||||
klona "^2.0.4"
|
||||
neo-async "^2.6.2"
|
||||
|
||||
sax@~1.2.4:
|
||||
sax@>=0.6.0, sax@~1.2.4:
|
||||
version "1.2.4"
|
||||
resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
|
||||
integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==
|
||||
@ -10552,6 +10552,19 @@ xml-name-validator@^3.0.0:
|
||||
resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
|
||||
integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==
|
||||
|
||||
xml2js@^0.6.0:
|
||||
version "0.6.0"
|
||||
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.6.0.tgz#07afc447a97d2bd6507a1f76eeadddb09f7a8282"
|
||||
integrity sha512-eLTh0kA8uHceqesPqSE+VvO1CDDJWMwlQfB6LuN6T8w6MaDJ8Txm8P7s5cHD0miF0V+GGTZrDQfxPZQVsur33w==
|
||||
dependencies:
|
||||
sax ">=0.6.0"
|
||||
xmlbuilder "~11.0.0"
|
||||
|
||||
xmlbuilder@~11.0.0:
|
||||
version "11.0.1"
|
||||
resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3"
|
||||
integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==
|
||||
|
||||
xmlchars@^2.2.0:
|
||||
version "2.2.0"
|
||||
resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"
|
||||
|
Loading…
Reference in New Issue
Block a user