// SEO Crawler Engine — runs entirely in the extension (no server)

class SEOCrawler {
  constructor(startUrl, maxPages = 100, onProgress) {
    this.startUrl = new URL(startUrl);
    this.baseOrigin = this.startUrl.origin;
    this.maxPages = maxPages;
    this.onProgress = onProgress;
    this.visited = new Set();
    this.queued = new Set();
    this.queue = [startUrl];
    this.queued.add(this.normalize(startUrl));
    this.pages = [];
    this.issues = [];
    this.running = false;
    this.concurrency = 3;
  }

  normalize(url) {
    try {
      const u = new URL(url, this.baseOrigin);
      u.hash = '';
      return u.href.replace(/\/$/, '');
    } catch { return url; }
  }

  stop() { this.running = false; }

  async start() {
    this.running = true;
    const workers = [];
    for (let i = 0; i < this.concurrency; i++) {
      workers.push(this.worker());
    }
    await Promise.all(workers);
    this.running = false;
    this.report();
  }

  async worker() {
    while (this.running && this.queue.length > 0 && this.visited.size < this.maxPages) {
      const url = this.queue.shift();
      if (!url) break;
      const norm = this.normalize(url);
      if (this.visited.has(norm)) continue;
      this.visited.add(norm);

      try {
        const result = await this.crawlPage(url);
        if (result) {
          this.pages.push(result);
          this.report();
        }
      } catch (err) {
        this.pages.push({ url, status: 0, error: err.message, issues: [{ type: 'Fetch Error', severity: 'high', message: err.message }] });
        this.report();
      }
    }
  }

  async crawlPage(url) {
    const res = await new Promise((resolve) => {
      chrome.runtime.sendMessage({ type: 'FETCH_PAGE', url }, resolve);
    });

    if (!res || res.error) {
      return { url, status: 0, error: res?.error || 'Unknown error', issues: [{ type: 'Fetch Error', severity: 'high', message: res?.error || 'Failed to fetch' }] };
    }

    const { status, html, headers, url: finalUrl } = res;
    const issues = [];

    // Parse HTML
    const parser = new DOMParser();
    const doc = parser.parseFromString(html, 'text/html');

    // Extract data
    const title = doc.querySelector('title')?.textContent?.trim() || '';
    const metaDesc = doc.querySelector('meta[name="description"]')?.getAttribute('content')?.trim() || '';
    const h1s = [...doc.querySelectorAll('h1')].map(el => el.textContent?.trim());
    const h2s = [...doc.querySelectorAll('h2')].map(el => el.textContent?.trim());
    const canonical = doc.querySelector('link[rel="canonical"]')?.getAttribute('href') || '';
    const robots = doc.querySelector('meta[name="robots"]')?.getAttribute('content') || '';
    const ogTitle = doc.querySelector('meta[property="og:title"]')?.getAttribute('content') || '';
    const ogDesc = doc.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
    const ogImage = doc.querySelector('meta[property="og:image"]')?.getAttribute('content') || '';
    const viewport = doc.querySelector('meta[name="viewport"]')?.getAttribute('content') || '';
    const images = [...doc.querySelectorAll('img')];
    const links = [...doc.querySelectorAll('a[href]')];
    const scripts = doc.querySelectorAll('script[src]');
    const stylesheets = doc.querySelectorAll('link[rel="stylesheet"]');
    const wordCount = doc.body?.textContent?.replace(/\s+/g, ' ').trim().split(' ').length || 0;
    const hasSchema = html.includes('application/ld+json');
    const hreflang = [...doc.querySelectorAll('link[rel="alternate"][hreflang]')];

    // === SEO Checks ===

    // Title
    if (!title) issues.push({ type: 'Missing Title', severity: 'high', message: 'Page has no title tag' });
    else if (title.length < 30) issues.push({ type: 'Title Too Short', severity: 'medium', message: `Title is ${title.length} chars (min 30): "${title.substring(0, 60)}"` });
    else if (title.length > 60) issues.push({ type: 'Title Too Long', severity: 'medium', message: `Title is ${title.length} chars (max 60): "${title.substring(0, 70)}..."` });

    // Meta description
    if (!metaDesc) issues.push({ type: 'Missing Meta Description', severity: 'high', message: 'No meta description found' });
    else if (metaDesc.length < 70) issues.push({ type: 'Meta Description Short', severity: 'medium', message: `${metaDesc.length} chars (min 70)` });
    else if (metaDesc.length > 160) issues.push({ type: 'Meta Description Long', severity: 'low', message: `${metaDesc.length} chars (max 160)` });

    // H1
    if (h1s.length === 0) issues.push({ type: 'Missing H1', severity: 'high', message: 'No H1 tag found' });
    else if (h1s.length > 1) issues.push({ type: 'Multiple H1s', severity: 'medium', message: `${h1s.length} H1 tags found` });

    // Images without alt
    const noAlt = images.filter(img => !img.getAttribute('alt')?.trim());
    if (noAlt.length > 0) issues.push({ type: 'Images Missing Alt', severity: 'medium', message: `${noAlt.length} image(s) without alt text` });

    // Large images (check for width/height attributes)
    const noSize = images.filter(img => !img.getAttribute('width') && !img.getAttribute('height'));
    if (noSize.length > 0) issues.push({ type: 'Images Missing Dimensions', severity: 'low', message: `${noSize.length} image(s) without width/height (causes CLS)` });

    // Canonical
    if (!canonical) issues.push({ type: 'Missing Canonical', severity: 'medium', message: 'No canonical URL specified' });

    // Open Graph
    if (!ogTitle) issues.push({ type: 'Missing OG Title', severity: 'low', message: 'No og:title meta tag' });
    if (!ogDesc) issues.push({ type: 'Missing OG Description', severity: 'low', message: 'No og:description meta tag' });
    if (!ogImage) issues.push({ type: 'Missing OG Image', severity: 'low', message: 'No og:image meta tag' });

    // Viewport
    if (!viewport) issues.push({ type: 'Missing Viewport', severity: 'high', message: 'No viewport meta tag (not mobile-friendly)' });

    // Schema
    if (!hasSchema) issues.push({ type: 'No Structured Data', severity: 'low', message: 'No JSON-LD schema markup found' });

    // Word count
    if (wordCount < 300) issues.push({ type: 'Thin Content', severity: 'medium', message: `Only ${wordCount} words (min 300 recommended)` });

    // Status code
    if (status >= 400) issues.push({ type: `HTTP ${status}`, severity: 'high', message: `Page returned ${status} status` });
    else if (status >= 300) issues.push({ type: `Redirect ${status}`, severity: 'low', message: `Page redirects (${status})` });

    // HTTPS
    if (url.startsWith('http://')) issues.push({ type: 'Not HTTPS', severity: 'high', message: 'Page served over HTTP' });

    // Broken links (internal)
    // We don't check these in real-time but we can queue them

    // Extract internal links and add to queue
    for (const a of links) {
      try {
        const href = a.getAttribute('href');
        if (!href || href.startsWith('#') || href.startsWith('mailto:') || href.startsWith('tel:') || href.startsWith('javascript:')) continue;
        const abs = new URL(href, finalUrl || url);
        if (abs.origin !== this.baseOrigin) continue;
        const norm = this.normalize(abs.href);
        if (!this.visited.has(norm) && !this.queued.has(norm) && (this.maxPages === Infinity || this.visited.size + this.queue.length < this.maxPages)) {
          this.queue.push(abs.href);
          this.queued.add(norm);
        }
      } catch { /* invalid URL */ }
    }

    // Add issues to global list
    for (const issue of issues) {
      this.issues.push({ ...issue, url: finalUrl || url });
    }

    return {
      url: finalUrl || url,
      status,
      title,
      metaDesc,
      h1: h1s[0] || '',
      h1Count: h1s.length,
      canonical,
      ogTitle,
      ogDesc,
      ogImage,
      wordCount,
      images: images.length,
      imagesNoAlt: noAlt.length,
      scripts: scripts.length,
      stylesheets: stylesheets.length,
      hasSchema,
      issueCount: issues.length,
      issues,
    };
  }

  report() {
    const ok = this.pages.filter(p => p.issueCount === 0).length;
    const warnings = this.issues.filter(i => i.severity === 'medium' || i.severity === 'low').length;
    const errors = this.issues.filter(i => i.severity === 'high').length;
    this.onProgress?.({
      crawled: this.pages.length,
      queued: this.queue.length,
      maxPages: this.maxPages,
      ok,
      warnings,
      errors,
      pages: this.pages,
      issues: this.issues,
    });
  }

  toCSV() {
    const headers = ['URL', 'Status', 'Title', 'Title Length', 'Meta Description', 'Meta Desc Length', 'H1', 'H1 Count', 'Canonical', 'Word Count', 'Images', 'Images No Alt', 'Has Schema', 'Issue Count'];
    const rows = this.pages.map(p => [
      p.url, p.status, `"${(p.title || '').replace(/"/g, '""')}"`, p.title?.length || 0,
      `"${(p.metaDesc || '').replace(/"/g, '""')}"`, p.metaDesc?.length || 0,
      `"${(p.h1 || '').replace(/"/g, '""')}"`, p.h1Count, p.canonical, p.wordCount,
      p.images, p.imagesNoAlt, p.hasSchema, p.issueCount,
    ]);
    return [headers.join(','), ...rows.map(r => r.join(','))].join('\n');
  }
}
