import mammoth from 'mammoth';
import * as pdfjsLib from 'pdfjs-dist/webpack';
import { read, utils } from 'xlsx';

export const convertFileToText = async (file) => {
  console.log('File object received:', file);

  const fileType = file.type;
  const fileExtension = file.name.split('.').pop().toLowerCase();

  try {
    // Text files
    if (fileType === 'text/plain' || fileExtension === 'txt') {
      return await readFileContent(file);
    }
    
    // PDF files
    if (fileType === 'application/pdf' || fileExtension === 'pdf') {
      return await extractTextFromPDF(file);
    }
    
    // Word documents
    if (fileType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' || 
        fileExtension === 'docx' ||
        fileType === 'application/msword' || 
        fileExtension === 'doc') {
      return await extractTextFromDOCX(file);
    }
    
    // Excel files
    if (fileType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' || 
        fileExtension === 'xlsx' ||
        fileType === 'application/vnd.ms-excel' || 
        fileExtension === 'xls') {
      return await extractTextFromExcel(file);
    }
    
    // CSV files
    if (fileType === 'text/csv' || fileExtension === 'csv') {
      return await extractTextFromCSV(file);
    }

    // XML files
    if (fileType === 'application/xml' || 
        fileType === 'text/xml' || 
        fileExtension === 'xml') {
      return await extractTextFromXML(file);
    }

    // JSON files
    if (fileType === 'application/json' || fileExtension === 'json') {
      return await extractTextFromJSON(file);
    }

    // RTF files
    if (fileType === 'application/rtf' || fileExtension === 'rtf') {
      return await readFileContent(file);
    }

    // Markdown files
    if (fileType === 'text/markdown' || 
        fileExtension === 'md' || 
        fileExtension === 'markdown') {
      return await readFileContent(file);
    }

    // Code files
    if (fileExtension === 'js' || 
        fileExtension === 'ts' ||
        fileExtension === 'py' ||
        fileExtension === 'java' ||
        fileExtension === 'cpp' ||
        fileExtension === 'c' ||
        fileExtension === 'cs' ||
        fileExtension === 'html' ||
        fileExtension === 'css' ||
        fileExtension === 'sql') {
      return await readFileContent(file);
    }

    return `[Unsupported file type: ${file.name}]`;
  } catch (error) {
    console.error(`Error processing file ${file.name}:`, error);
    return `[Error processing file: ${file.name}] - ${error.message}`;
  }
};

// Helper functions remain the same but are now private to the module
const readFileContent = async (file) => {
  if (file.text && typeof file.text === 'function') {
    return await file.text();
  } else if (file.content) {
    return file.content;
  } else if (file.url) {
    const response = await fetch(file.url);
    return await response.text();
  } else if (file instanceof Blob) {
    return await new Response(file).text();
  } else {
    throw new Error('Unable to read file content');
  }
};

const extractTextFromPDF = async (file) => {
  let pdfData;
  if (file.arrayBuffer && typeof file.arrayBuffer === 'function') {
    pdfData = await file.arrayBuffer();
  } else if (file.content) {
    pdfData = file.content;
  } else if (file.url) {
    const response = await fetch(file.url);
    pdfData = await response.arrayBuffer();
  } else if (file instanceof Blob) {
    pdfData = await file.arrayBuffer();
  } else {
    throw new Error('Unable to read PDF content');
  }

  const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise;
  let text = '';
  const maxPages = Math.min(pdf.numPages, 50);  // Limit to first 50 pages
  for (let i = 1; i <= maxPages; i++) {
    const page = await pdf.getPage(i);
    const content = await page.getTextContent();
    text += content.items.map(item => item.str).join(' ') + '\n';
  }
  if (pdf.numPages > maxPages) {
    text += `\n[Note: Only the first ${maxPages} pages were processed due to file size.]`;
  }
  return text;
};

const extractTextFromDOCX = async (file) => {
  let docxData;
  if (file.arrayBuffer && typeof file.arrayBuffer === 'function') {
    docxData = await file.arrayBuffer();
  } else if (file.content) {
    docxData = file.content;
  } else if (file.url) {
    const response = await fetch(file.url);
    docxData = await response.arrayBuffer();
  } else if (file instanceof Blob) {
    docxData = await file.arrayBuffer();
  } else {
    throw new Error('Unable to read DOCX content');
  }

  const result = await mammoth.extractRawText({ arrayBuffer: docxData });
  return result.value;
};

const extractTextFromExcel = async (file) => {
  let data;
  if (file.arrayBuffer && typeof file.arrayBuffer === 'function') {
    data = await file.arrayBuffer();
  } else if (file instanceof Blob) {
    data = await file.arrayBuffer();
  } else {
    throw new Error('Unable to read Excel content');
  }

  const workbook = read(data, { type: 'array' });
  let text = '';

  // Process each sheet
  workbook.SheetNames.forEach(sheetName => {
    const sheet = workbook.Sheets[sheetName];
    text += `\n=== Sheet: ${sheetName} ===\n`;
    
    // Convert sheet to JSON for easier processing
    const jsonData = utils.sheet_to_json(sheet, { header: 1 });
    
    // Convert to formatted text
    jsonData.forEach(row => {
      if (row.length > 0) {
        text += row.join('\t') + '\n';
      }
    });
  });

  return text;
};

const extractTextFromCSV = async (file) => {
  const content = await readFileContent(file);
  // Split by lines and format
  const lines = content.split('\n');
  return lines.map(line => line.split(',').join('\t')).join('\n');
};

const extractTextFromXML = async (file) => {
  const content = await readFileContent(file);
  try {
    // Use browser's built-in DOMParser
    const parser = new DOMParser();
    const xmlDoc = parser.parseFromString(content, "text/xml");
    
    // Check for parsing errors
    const parserError = xmlDoc.querySelector('parsererror');
    if (parserError) {
      throw new Error('Invalid XML format');
    }

    // Convert XML to formatted text
    const serializeNode = (node, level = 0) => {
      const indent = '  '.repeat(level);
      let result = '';

      // Handle element nodes
      if (node.nodeType === 1) { // ELEMENT_NODE
        result += `${indent}<${node.nodeName}`;
        
        // Add attributes
        Array.from(node.attributes).forEach(attr => {
          result += ` ${attr.name}="${attr.value}"`;
        });

        if (node.childNodes.length === 0) {
          result += '/>\n';
        } else {
          result += '>\n';
          // Process child nodes
          Array.from(node.childNodes).forEach(child => {
            result += serializeNode(child, level + 1);
          });
          result += `${indent}</${node.nodeName}>\n`;
        }
      }
      // Handle text nodes
      else if (node.nodeType === 3) { // TEXT_NODE
        const text = node.textContent.trim();
        if (text) {
          result += `${indent}${text}\n`;
        }
      }

      return result;
    };

    return serializeNode(xmlDoc.documentElement);
  } catch (error) {
    throw new Error(`Invalid XML format: ${error.message}`);
  }
};

const extractTextFromJSON = async (file) => {
  const content = await readFileContent(file);
  try {
    const jsonObj = JSON.parse(content);
    return JSON.stringify(jsonObj, null, 2); // Pretty print JSON
  } catch (error) {
    throw new Error('Invalid JSON format');
  }
};