import { Cheerio, CheerioAPI, Element, load } from "cheerio";
import { uniq } from "lodash";

import { htmlElemToText } from "@/crawler/htmlToText";
import { loggerWithPrefix } from "@/lib/logger";
import { Education, LinkedinProfile, WorkExperience } from "@/types";
import { objToString } from "@/utils/entityUtils";

const logger = loggerWithPrefix("[linkedinParser]");

export class LinkedInProfileParser {
  $: CheerioAPI;

  constructor(public text: string) {
    this.$ = load(text);
  }

  createProfile() {
    return {
      profile: { name: "" },
      experience: [],
      education: [],
      recentActivity: [],
    } as LinkedinProfile;
  }

  parse(url?: string): LinkedinProfile {
    const { $ } = this;
    const mainSection = $("main");
    const sections = mainSection.find("section");

    const profile = this.createProfile();
    profile.profile.url = url;

    sections.each((i, section) => {
      const sectionHtml = $(section);
      const anchorSection = sectionHtml.find(".pv-profile-card__anchor");
      const anchorId = anchorSection?.attr("id");

      switch (anchorId) {
        case undefined:
          this.parseProfileCard(sectionHtml, profile);
          break;
        case "about":
          this.parseAbout(sectionHtml, profile);
          break;
        case "highlights":
          this.parseHighlights(sectionHtml, profile);
          break;
        case "content_collections":
          this.parseRecentActivity(sectionHtml, profile);
          break;
        case "experience":
          this.parseExperience(sectionHtml, profile);
          break;
        case "education":
          this.parseEducation(sectionHtml, profile);
          break;
        case "patents":
          this.parsePatents(sectionHtml, profile);
          break;
        case "projects":
          this.parseProjects(sectionHtml, profile);
          break;
        case "skills":
          this.parseSkills(sectionHtml, profile);
          break;
        case "languages":
          this.parseLanguages(sectionHtml, profile);
          break;
        case "interests":
          this.parseInterests(sectionHtml, profile);
          break;
        case "volunteering_experience":
          this.parseVolunteering(sectionHtml, profile);
          break;
        default:
          logger.debug("unsupported section", anchorId);
      }
    });

    if (!profile.profile.name) throw new Error("Invalid profile");

    return profile;
  }

  parseExperiencePage(): WorkExperience[] {
    const { $ } = this;
    const mainSection = $("main");
    const section = mainSection.find("section");

    const profile = this.createProfile();
    const sectionHtml = $(section);
    this.parseExperience(sectionHtml, profile);
    return profile.experience;
  }

  parseEducationPage(): Education[] {
    const { $ } = this;
    const mainSection = $("main");
    const section = mainSection.find("section");

    const profile = this.createProfile();
    const sectionHtml = $(section);
    this.parseEducation(sectionHtml, profile);
    return profile.education;
  }

  // extract cover and profile image
  // extract basics: name, title, summary, location, connections, mutuals, your relationship
  private parseProfileCard(section: Cheerio<Element>, profile: LinkedinProfile) {
    const { $ } = this;

    // if we already saw a profile name, we can skip this section
    if (profile.profile.name) return;

    // Cover Image
    const coverImageUrl = section.find(".profile-background-image__image").attr("src");

    // Profile Image
    let profileImageUrl = section
      .find(
        ".pv-top-card-profile-picture__image, .pv-top-card-profile-picture__image--show, .profile-photo-edit__preview",
      )
      .attr("src");
    if (
      !profileImageUrl ||
      profileImageUrl.startsWith("data:") ||
      profileImageUrl.includes('"/static.licdn.com/')
    ) {
      profileImageUrl = undefined;
    }

    // Name
    let name = section.find("h1").text().trim();
    if (name.includes(",")) name = name.split(",")[0]; // get rid of titles in linkedin names

    // Title (Position and Company)
    const title = section.find(".text-body-medium").first().text().trim();

    // Location
    const locationContainer = section.find('a[href*="/contact-info"]').closest("div");
    const location = locationContainer.find("span").first().text().trim();

    // Connections
    const connectionsAndFollowersUl = section.find("> div > ul").first();
    const listItems = connectionsAndFollowersUl.children("li");
    let connections, followers;
    listItems.each((index, element) => {
      const itemText = $(element).text().trim();
      if (itemText.includes("followers")) {
        followers = itemText.split(" ")[0];
      } else if (itemText.includes("connections")) {
        connections = itemText.split(" ")[0];
      }
    });

    // Mutual Connections
    const mutuals = section
      .find('a[href*="/search/results/people/?facetNetwork="] .visually-hidden')
      .first()
      .text()
      .trim();

    // Degree of Connection
    const degree = section.find(".dist-value").text().trim();

    // Assign extracted data to the profile object
    profile.profile = {
      coverImage: coverImageUrl,
      profileImage: profileImageUrl,
      name,
      title,
      location,
      followers,
      connections,
      mutuals: mutuals.length ? mutuals : undefined,
      degree,
    };
  }

  private parseAbout(section: Cheerio<Element>, profile: LinkedinProfile) {
    const text = section.find('.t-normal [aria-hidden="true"]').text().trim();
    profile.about = text;
  }

  private parseHighlights(section: Cheerio<Element>, profile: LinkedinProfile) {
    const text = section
      .find('.t-normal [aria-hidden="true"]')
      .map((i, e) => this.$(e).text().trim())
      .get();
    profile.highlights = text;
  }

  private parseRecentActivity(section: Cheerio<Element>, profile: LinkedinProfile) {
    const { $ } = this;

    const activities: typeof profile.recentActivity = [];
    const recentActivity = section.find("ul li");
    recentActivity.each((i, activity) => {
      const activityHtml = $(activity);
      const allLinks = activityHtml.find("a.app-aware-link");
      if (!allLinks.length) return;
      const headerLink = allLinks.eq(0);
      const title = headerLink
        .find("[aria-hidden=true]")
        .first()
        .text()
        .trim()
        .replace(/\s+/g, " ");
      const date = headerLink.find(".visually-hidden").text().trim();

      // get remainder of links
      const bodyLinks = allLinks.slice(1);
      const image = bodyLinks.find("img").first().attr("src");
      const ariaLabels = bodyLinks
        .map((i, el) => $(el).attr("aria-label"))
        .get()
        .map((p) =>
          p
            .replace(/View full post(. )?/, "")
            .replace(/^Image$/, "")
            .replace(/\s+/g, " ")
            .trim(),
        )
        .filter(Boolean);

      activities.push({
        url: headerLink.attr("href")!,
        title,
        date,
        image,
        text: uniq(ariaLabels).join("\n"),
      });
    });
    profile.recentActivity = activities;
  }

  private parseExperience(section: Cheerio<Element>, profile: LinkedinProfile) {
    const { $ } = this;
    const experienceList = section.find("ul").first();
    const experiences = experienceList.children("li");

    section.find("li-icon").replaceWith("LinkedIn");

    profile.experience = experiences
      .map((i, el) => {
        let element = $(el);

        // decend into first child with multiple children
        while (element.children().length == 1) {
          element = element.children().first();
        }

        const leftColumn = element.children().first();
        let logoElement = leftColumn.find("a").first();
        if (logoElement.length == 0) {
          logoElement = leftColumn;
        }

        const url = logoElement.attr("href") || undefined;
        const logo = logoElement.find("img").attr("src") || undefined;

        const dataColumn = logoElement.closest("div").next();
        const dataColumnChildren = dataColumn.find(".flex-column").first().children();
        const positionItems = dataColumn.find(
          ".pvs-entity__sub-components > ul > li [data-view-name=profile-component-entity]",
        );

        // to have multiple positions, we need to see a list of entities & the header has <= 2 items
        const multiplePositions = positionItems.length > 1;

        logger.debug(
          "multiplePositions",
          i,
          dataColumnChildren.eq(0).find('[aria-hidden="true"]').text().trim(),
          multiplePositions,
          positionItems.length,
          dataColumn.find(
            ".pvs-entity__sub-components > ul > li [data-view-name=profile-component-entity]",
          ).length,
          dataColumnChildren.length,
        );

        if (!multiplePositions) {
          const title =
            dataColumnChildren.eq(0).find('[aria-hidden="true"]').text().trim() || undefined;
          const company =
            dataColumnChildren.eq(1).find('[aria-hidden="true"]').text().trim() || undefined;
          const dates =
            dataColumnChildren.eq(2).find('[aria-hidden="true"]').text().trim() || undefined;
          const location =
            dataColumnChildren.eq(3).find('[aria-hidden="true"]').text().trim() || undefined;

          const descriptionContainer = dataColumn.find("ul [aria-hidden=true]");
          const description = htmlElemToText($, descriptionContainer, false, true) || undefined;

          return {
            title,
            logo,
            company,
            url,
            location,
            positions: [{ title, dates, location, description }],
          } as LinkedinProfile["experience"][number];
        }

        const company = dataColumnChildren.eq(0).find('[aria-hidden="true"]').text().trim();
        const totalDuration = dataColumnChildren.eq(1).find('[aria-hidden="true"]').text().trim();
        const location = dataColumnChildren.eq(2).find('[aria-hidden="true"]').text().trim();

        const positions = positionItems
          .map((i, el) => {
            const positionElement = $(el);
            const positionLink = positionElement.find("a.flex-column").first();
            const positionChildren = positionLink.children();

            const labels = positionChildren
              .map((i, el) => $(el).find('[aria-hidden="true"]').first().text().trim())
              .get();

            // possible configurations:
            // title, date
            // title, type, date
            // title, date, location
            // title, type, date, location

            const title = labels.shift();
            let type, dates;
            if (labels[0]?.includes("·")) {
              dates = labels.shift();
            } else {
              type = labels.shift();
              dates = labels.shift();
            }
            const location = labels.shift();

            const descriptionContainer = positionElement.find("ul [aria-hidden=true]");
            const description = htmlElemToText($, descriptionContainer, false, true) || undefined;

            return { title, type, dates, location, description };
          })
          .get();

        return {
          logo,
          company,
          url,
          totalDuration,
          location,
          positions,
        } as LinkedinProfile["experience"][number];
      })
      .get();

    profile.experience = profile.experience.filter((e) => e.company);

    const hasMore = experienceList.next().find("a").first();
    profile.hasMoreExperience = hasMore.length ? hasMore.text().trim() : undefined;
  }

  private parseEducation(section: Cheerio<Element>, profile: LinkedinProfile) {
    const { $ } = this;
    const educationList = section.find("ul").first();
    const educations = educationList.children("li");

    profile.education = educations
      .map((i, el) => {
        let element = $(el);

        // decend into first child with multiple children
        while (element.children().length == 1) {
          element = element.children().first();
        }

        const leftColumn = element.children().first();
        let logoElement = leftColumn.find("a").first();
        if (logoElement.length == 0) {
          logoElement = leftColumn;
        }

        const url = logoElement.attr("href") || undefined;
        const logo = logoElement.find("img").attr("src") || undefined;

        const dataColumn = logoElement.closest("div").next();
        const dataColumnChildren = dataColumn.find(".flex-column").first().children();

        const institution = dataColumnChildren.eq(0).find('[aria-hidden="true"]').text().trim();
        const degree =
          dataColumnChildren.eq(1).find('[aria-hidden="true"]').text().trim() || undefined;
        const dates =
          dataColumnChildren.eq(2).find('[aria-hidden="true"]').text().trim() || undefined;
        const location =
          dataColumnChildren.eq(3).find('[aria-hidden="true"]').text().trim() || undefined;
        const description =
          htmlElemToText($, dataColumn.find("ul [aria-hidden=true]").first(), false, true) ||
          undefined;

        return {
          logo,
          institution,
          url,
          degree,
          dates,
          location,
          description,
        } as LinkedinProfile["education"][number];
      })
      .get();

    profile.education = profile.education.filter((e) => e.institution);

    const hasMore = educationList.next().find("a").first();
    profile.hasMoreEducation = hasMore.length ? hasMore.text().trim() : undefined;
  }

  private parsePatents(section: Cheerio<Element>, profile: LinkedinProfile) {
    profile.patents = this.extractTextHelper(section);
  }

  private parseProjects(section: Cheerio<Element>, profile: LinkedinProfile) {
    profile.projects = this.extractTextHelper(section);
  }

  private parseSkills(section: Cheerio<Element>, profile: LinkedinProfile) {
    profile.skills = this.extractTextHelper(section);
  }

  private parseLanguages(section: Cheerio<Element>, profile: LinkedinProfile) {
    profile.languages = this.extractTextHelper(section);
  }

  private parseInterests(section: Cheerio<Element>, profile: LinkedinProfile) {
    profile.interests = this.extractTextHelper(section);
  }

  private parseVolunteering(section: Cheerio<Element>, profile: LinkedinProfile) {
    profile.volunteering = this.extractTextHelper(section);
  }

  private extractTextHelper(section: Cheerio<Element>): string[] {
    const { $ } = this;

    section.find(".pvs-header__title").remove();
    section.find("[aria-hidden=true]").remove();
    section.find("button").remove();

    const topLevelList = section.find("ul").first();
    if (!topLevelList.length) return [section.text().replace(/\s+/, " ").trim()];

    return topLevelList
      .children()
      .map((i, el) => htmlElemToText($, $(el), false, true))
      .get()
      .filter(Boolean);
  }
}

export function linkedinProfileSummary(profile: LinkedinProfile) {
  const parts: string[] = [];

  if (!profile.profile) return objToString(profile);

  if (profile.profile.title) parts.push(profile.profile.title);
  if (profile.profile.location) parts.push(profile.profile.location);
  if (profile.approxAge) parts.push("Approx Age: " + profile.approxAge);
  if (profile.about) parts.push("About: " + profile.about);

  if (profile.experience) parts.push("\nWork Experience:");
  profile.experience?.forEach((exp) => {
    exp.positions.forEach((pos) => {
      parts.push(
        `- ${pos.title} at ${exp.company} ${pos.dates ? `(${pos.dates})` : ""}` +
          (pos.description ? "\n  " + pos.description : ""),
      );
    });
  });
  if (profile.hasMoreExperience)
    parts.push(`- ${profile.hasMoreExperience.replace("Show all", "Total of")}`);

  if (profile.education) parts.push("\nEducation:");
  profile.education?.forEach((edu) => {
    parts.push(
      `- ${edu.degree} at ${edu.institution} ${edu.dates ? `(${edu.dates})` : ""}` +
        (edu.description ? "\n  " + edu.description : ""),
    );
  });
  if (profile.hasMoreEducation)
    parts.push(`- ${profile.hasMoreEducation.replace("Show all", "Total of")}`);

  if (profile.skills?.length) {
    parts.push("\nInterests:");
    profile.skills.forEach((skill) => {
      parts.push(`- ${skill.split("\n")[0]}`);
    });
  }

  if (profile.volunteering?.length) {
    parts.push("\nVolunteering:");
    profile.volunteering.forEach((volunteering) => {
      parts.push(`- ${volunteering.split("\n")[0]}`);
    });
  }

  return parts.join("\n");
}
