import { parse, ELEMENT_NODE, TEXT_NODE } from 'ultrahtml';
import { unescape } from 'he';

export type IDocumentTextNode = {
  type: 'text';
  content: string;
  isBold?: boolean;
  isItalic?: boolean;
};

export type IDocumentListNode = {
  type: 'list';
  items: IDocumentListItemNode[];
  isOrdered?: boolean;
};

export type IDocumentListItemNode = {
  type: 'list-item';
  children: IChildNode[];
};

export type IDocumentLinkNode = {
  type: 'link';
  children: IChildNode[];
};

export type DocumentTableNode = {
  type: 'table';
  rows: DocumentTableRowNode[];
};

export type DocumentTableRowNode = {
  type: 'tr';
  columns: DocumentTableColumnNode[];
};

export type DocumentTableColumnNode = {
  type: 'td' | 'th';
  colspan?: number;
  children: IChildNode[];
};

export type DocumentParagraphNode = {
  type: 'paragraph';
  children: IChildNode[];
};

export type DocumentHeadingNode = {
  type: 'heading';
  level: number;
  children: IChildNode[];
};

export type IChildNode =
  | DocumentTableNode
  | DocumentParagraphNode
  | DocumentHeadingNode
  | IDocumentListNode
  | IDocumentTextNode;

function processText(content: string): string {
  return unescape(content);
}

function processTable(table: any): DocumentTableNode {
  const rows: DocumentTableRowNode[] = [];
  for (const child of table.children) {
    if (child.type === ELEMENT_NODE) {
      if (child.name === 'tr') {
        const columns: DocumentTableColumnNode[] = [];
        for (const column of child.children) {
          if (column.type === ELEMENT_NODE) {
            if (column.name === 'td' || column.name === 'th') {
              const children = column.children.map((v: any) => processNode(v));
              columns.push({
                type: column.name,
                colspan: +column.attributes.colspan || undefined,
                children,
              });
            }
          }
        }
        rows.push({
          type: 'tr',
          columns,
        });
      }
    }
  }
  return {
    type: 'table',
    rows,
  };
}

function getTextValue(node: any): string {
  if (node.type === TEXT_NODE) {
    return node.value.replace(/[\r\n]+/g, '').trim();
  } else if (node.type === ELEMENT_NODE) {
    return node.children.map((c: any) => getTextValue(c)).join(' ');
  } else {
    console.warn('Unexpected node for getTextValue', node);
    return '';
  }
}

function processList(node: any): IDocumentListNode {
  const isOrdered = node.name === 'ol';
  const items: IDocumentListItemNode[] = [];
  for (const item of node.children) {
    if (Array.isArray(item.children)) {
      const processed = processParagraph(item);
      items.push({
        type: 'list-item',
        children: processed.children,
      });
    }
  }
  return {
    type: 'list',
    isOrdered,
    items,
  };
}

function processParagraph(node: any): DocumentParagraphNode {
  const children: DocumentParagraphNode['children'] = [];
  if (Array.isArray(node.children)) {
    for (const child of node.children) {
      if (child.type === TEXT_NODE) {
        children.push({
          type: 'text',
          content: processText(child.value),
        });
      } else if (child.type === ELEMENT_NODE) {
        switch (child.name) {
          case 'strong':
          case 'b': {
            children.push({
              type: 'text',
              content: processText(getTextValue(child)),
              isBold: true,
            });
            break;
          }
          case 'em': {
            children.push({
              type: 'text',
              content: processText(getTextValue(child)),
              isItalic: true,
            });
            break;
          }
          case 'ul':
          case 'ol': {
            children.push(processList(child));
            break;
          }
          default: {
            children.push({
              type: 'text',
              content: processText(getTextValue(child)),
            });
            break;
          }
        }
      } else {
        console.warn('Unexpected node in paragraph', child);
      }
    }
  }
  return {
    type: 'paragraph',
    children,
  };
}

function processHeading(node: any): DocumentHeadingNode {
  const level = +node.name[1];
  return {
    type: 'heading',
    level,
    children: [
      {
        type: 'text',
        content: processText(getTextValue(node)),
      },
    ],
  };
}

function processNode(node: any): IChildNode {
  if (node.type === ELEMENT_NODE) {
    switch (node.name) {
      case 'table': {
        return processTable(node);
      }
      case 'p': {
        return processParagraph(node);
      }
      case 'ol':
      case 'ul': {
        return processList(node);
      }
      case 'h1':
      case 'h2':
      case 'h3':
      case 'h4':
      case 'h5':
      case 'h6': {
        return processHeading(node);
      }
      default: {
        console.warn('unexpected html tag', node.name);
      }
    }

    return {
      type: 'text',
      content: processText(getTextValue(node)),
    };
  }

  if (node.type === TEXT_NODE) {
    return {
      type: 'text',
      content: processText(getTextValue(node)),
    };
  }

  return {
    type: 'text',
    content: '',
  };
}

export function generateDocumentContentTree(content: string): IChildNode[] {
  const ast = parse(content);
  const nodes = [];
  for (const child of ast.children) {
    const processed = processNode(child);
    if (processed.type === 'text' && processed.content === '') {
      continue;
    }
    nodes.push(processed);
  }
  return nodes;
}
