const excludedSnippets: string[] = [
  "/signup",
  "/signin",
  "/login",
  "linkedin.com/jobs",
  "linkedin.com/uas",
  "linkedin.com/legal",
  "linkedin.com/learning",
  "linkedin.com/hubs",
  "linkedin.com/directory",
  "linkedin.com/games",
  "linkedin.com/products",
  "linkedin.com/pulse",
  "chrome.google.com",
  "about.crunchbase.com",
  "latka.com",
  "cbinsights.com",
  "founderled.com",
  "/policies",
  "/terms",
  "/privacy",
  "/accessibility",
  "twitter.com/intent",
];

const excludePrefixes = ["#", "/buy"];
const excludeSuffixes = ["news.crunchbase.com/", "/crunchbase"];

export function excludeLink(link: string) {
  return (
    excludedSnippets.some((snippet) => link.includes(snippet)) ||
    excludePrefixes.some((prefix) => link.startsWith(prefix)) ||
    excludeSuffixes.some((suffix) => link.endsWith(suffix))
  );
}

/** clean URLs for the purpose of deduplication. This will remove UTM parameters, redirect URLs, and other non-canonical links */
export function sanitizeLink(link: string, replaceWWW?: boolean): string | null {
  let sanitizedLink = decodeURIComponent(link); // Decode URL

  // Remove UTM parameters, trk, and refer
  sanitizedLink = sanitizedLink.replace(/[\?&](cta_[^&]+|utm_[^&]+|.*refer.*|trk)=[^&]+/g, "");
  if (sanitizedLink.includes("&") && !sanitizedLink.includes("?")) {
    sanitizedLink = sanitizedLink.replace("&", "?");
  }

  // remove #blah links
  sanitizedLink = sanitizedLink.replace(/#.*/, "");

  // Check if it's a redirect URL
  const redirectMatch = sanitizedLink.match(/redirect\?url=(.+)/);
  if (redirectMatch) {
    sanitizedLink = decodeURIComponent(redirectMatch[1]);
  }

  // Check if the link starts with any of the blacklist prefixes
  const isBlacklisted = excludeLink(sanitizedLink);
  if (isBlacklisted) {
    return null;
  }

  if (
    sanitizedLink.includes(":") &&
    !sanitizedLink.startsWith("http") &&
    !sanitizedLink.startsWith("mailto:")
  ) {
    return null;
  }

  if (replaceWWW) {
    sanitizedLink = sanitizedLink.replace("www.", "");
  }

  return sanitizedLink;
}

export function sanitizeLinks(links: string[]): string[] {
  const sanitizedLinks = new Set<string>();

  for (const link of links) {
    const sanitizedLink = sanitizeLink(link);
    if (!sanitizedLink) {
      continue;
    }

    if (sanitizedLink.includes("www.")) {
      const nonWWW = sanitizedLink.replace("www.", "");
      if (sanitizedLinks.has(nonWWW)) {
        continue;
      }
    }

    sanitizedLinks.add(sanitizedLink);
  }

  const uniqueSanitizedLinks = Array.from(sanitizedLinks);

  return uniqueSanitizedLinks;
}

export function normalizeUrl(url: string): string {
  if (!url.includes("://")) {
    url = "https://" + url;
  }

  try {
    const parsed = new URL(url);
    return parsed.origin.replace(/^www\./, "") + parsed.pathname;
  } catch (e) {
    return url.replace(/.*?:\/\//g, "");
  }
}

export function fixRelativeUrl(url: string, parent: URL) {
  return url.startsWith("/") ? parent.origin + url : url;
}
