import axios from 'axios';
import cheerio from 'cheerio';
import urlLib from 'url';
import middleware from './_common/middleware.js';

const linkedPagesHandler = async (url) => {
  const response = await axios.get(url);
  const html = response.data;
  const $ = cheerio.load(html);
  const internalLinksMap = new Map();
  const externalLinksMap = new Map();

  // Get all links on the page
  $('a[href]').each((i, link) => {
    const href = $(link).attr('href');
    const absoluteUrl = urlLib.resolve(url, href);
    
    // Check if absolute / relative, append to appropriate map or increment occurrence count
    if (absoluteUrl.startsWith(url)) {
      const count = internalLinksMap.get(absoluteUrl) || 0;
      internalLinksMap.set(absoluteUrl, count + 1);
    } else if (href.startsWith('http://') || href.startsWith('https://')) {
      const count = externalLinksMap.get(absoluteUrl) || 0;
      externalLinksMap.set(absoluteUrl, count + 1);
    }
  });

  // Sort by most occurrences, remove supplicates, and convert to array
  const internalLinks = [...internalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
  const externalLinks = [...externalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]);

  // If there were no links, then mark as skipped and show reasons
  if (internalLinks.length === 0 && externalLinks.length === 0) {
    return {
      statusCode: 400,
      body: {
        skipped: 'No internal or external links found. '
          + 'This may be due to the website being dynamically rendered, using a client-side framework (like React), and without SSR enabled. '
          + 'That would mean that the static HTML returned from the HTTP request doesn\'t contain any meaningful content for Web-Check to analyze. '
          + 'You can rectify this by using a headless browser to render the page instead.',
        },
    };
  }

  return { internal: internalLinks, external: externalLinks };
};

export const handler = middleware(linkedPagesHandler);
export default handler;