mirror of
https://github.com/Lissy93/web-check.git
synced 2024-11-22 16:23:56 +01:00
Adds API endpoint to fetch data from Wayback Machine
This commit is contained in:
parent
8263b9b7fd
commit
d63f891667
83
api/archives.js
Normal file
83
api/archives.js
Normal file
@ -0,0 +1,83 @@
|
||||
const axios = require('axios');
|
||||
const middleware = require('./_common/middleware');
|
||||
|
||||
const convertTimestampToDate = (timestamp) => {
|
||||
const [year, month, day, hour, minute, second] = [
|
||||
timestamp.slice(0, 4),
|
||||
timestamp.slice(4, 6) - 1,
|
||||
timestamp.slice(6, 8),
|
||||
timestamp.slice(8, 10),
|
||||
timestamp.slice(10, 12),
|
||||
timestamp.slice(12, 14),
|
||||
].map(num => parseInt(num, 10));
|
||||
|
||||
return new Date(year, month, day, hour, minute, second);
|
||||
}
|
||||
|
||||
const countPageChanges = (results) => {
|
||||
let prevDigest = null;
|
||||
return results.reduce((acc, curr) => {
|
||||
if (curr[2] !== prevDigest) {
|
||||
prevDigest = curr[2];
|
||||
return acc + 1;
|
||||
}
|
||||
return acc;
|
||||
}, -1);
|
||||
}
|
||||
|
||||
const getAveragePageSize = (scans) => {
|
||||
const totalSize = scans.map(scan => parseInt(scan[3], 10)).reduce((sum, size) => sum + size, 0);
|
||||
return Math.round(totalSize / scans.length);
|
||||
};
|
||||
|
||||
const getScanFrequency = (firstScan, lastScan, totalScans, changeCount) => {
|
||||
const formatToTwoDecimal = num => parseFloat(num.toFixed(2));
|
||||
|
||||
const dayFactor = (lastScan - firstScan) / (1000 * 60 * 60 * 24);
|
||||
const daysBetweenScans = formatToTwoDecimal(dayFactor / totalScans);
|
||||
const daysBetweenChanges = formatToTwoDecimal(dayFactor / changeCount);
|
||||
const scansPerDay = formatToTwoDecimal((totalScans - 1) / daysBetweenScans);
|
||||
const changesPerDay = formatToTwoDecimal(changeCount / daysBetweenScans);
|
||||
return {
|
||||
daysBetweenScans,
|
||||
daysBetweenChanges,
|
||||
scansPerDay,
|
||||
changesPerDay,
|
||||
};
|
||||
};
|
||||
|
||||
const getWaybackData = async (url) => {
|
||||
const cdxUrl = `https://web.archive.org/cdx/search/cdx?url=${url}&output=json&fl=timestamp,statuscode,digest,length,offset`;
|
||||
|
||||
try {
|
||||
const { data } = await axios.get(cdxUrl);
|
||||
|
||||
// Check there's data
|
||||
if (!data || !Array.isArray(data) || data.length <= 1) {
|
||||
return { skipped: 'Site has never before been archived via the Wayback Machine' };
|
||||
}
|
||||
|
||||
// Remove the header row
|
||||
data.shift();
|
||||
|
||||
// Process and return the results
|
||||
const firstScan = convertTimestampToDate(data[0][0]);
|
||||
const lastScan = convertTimestampToDate(data[data.length - 1][0]);
|
||||
const totalScans = data.length;
|
||||
const changeCount = countPageChanges(data);
|
||||
return {
|
||||
firstScan,
|
||||
lastScan,
|
||||
totalScans,
|
||||
changeCount,
|
||||
averagePageSize: getAveragePageSize(data),
|
||||
scanFrequency: getScanFrequency(firstScan, lastScan, totalScans, changeCount),
|
||||
scans: data,
|
||||
scanUrl: url,
|
||||
};
|
||||
} catch (err) {
|
||||
return { error: `Error fetching Wayback data: ${err.message}` };
|
||||
}
|
||||
};
|
||||
|
||||
exports.handler = middleware(getWaybackData);
|
Loading…
Reference in New Issue
Block a user