<?php
if (!defined('ABSPATH')) exit;

/**
 * Handles automatic processing of new posts/pages
 * Runs: Scanner → Keywords → Anchor Bank → Linker
 */
class InternalLinksTool_Automation {

  public static function init() {
    // Hook into post publish/update
    add_action('transition_post_status', [__CLASS__, 'on_post_status_change'], 20, 3);
  }

  /**
   * Triggered when post status changes
   */
  public static function on_post_status_change($new_status, $old_status, $post) {
    // Only process when transitioning TO publish (new publish or first publish)
    if ($new_status !== 'publish') return;

    // Skip if already published (update, not new)
    if ($old_status === 'publish') return;

    // Check if automation is enabled
    $settings = class_exists('InternalLinksTool_Admin')
      ? InternalLinksTool_Admin::get_settings()
      : [];

    if (($settings['auto_process_new'] ?? 'manual') !== 'auto') {
      return;
    }

    // Check post type
    $post_type = $post->post_type;
    if ($post_type === 'post' && empty($settings['include_posts'])) return;
    if ($post_type === 'page' && empty($settings['include_pages'])) return;
    if (!in_array($post_type, ['post', 'page'], true)) return;

    $post_id = $post->ID;

    // Schedule async processing to avoid blocking the publish
    // Use a short delay to ensure the post is fully saved
    wp_schedule_single_event(time() + 5, 'internallinkstool_auto_process_post', [$post_id]);

    // Kick cron to ensure it runs (some hosts block loopback)
    spawn_cron();

    self::log("Scheduled auto-processing for new post #{$post_id}: " . $post->post_title);
  }

  /**
   * Register the cron action
   */
  public static function register_cron_action() {
    add_action('internallinkstool_auto_process_post', [__CLASS__, 'process_single_post']);
  }

  /**
   * Process a single post through the full pipeline
   */
  public static function process_single_post($post_id) {
    $post_id = (int)$post_id;
    if ($post_id <= 0) return;

    $post = get_post($post_id);
    if (!$post) return;

    // Verify it's still published
    if ($post->post_status !== 'publish') return;

    // Check settings again
    $settings = class_exists('InternalLinksTool_Admin')
      ? InternalLinksTool_Admin::get_settings()
      : [];

    if (($settings['auto_process_new'] ?? 'manual') !== 'auto') {
      return;
    }

    // Log start
    self::log("Auto-processing post #{$post_id}: " . $post->post_title);

    // Step 1: Scanner - Add to documents table
    $doc_id = self::run_scanner($post_id);
    if (!$doc_id) {
      self::log("Scanner failed for post #{$post_id}");
      return;
    }
    self::log("Scanner complete: doc_id = {$doc_id}");

    // Step 2: Keywords - Extract keywords using AI
    $keywords_ok = self::run_keywords($doc_id);
    if (!$keywords_ok) {
      self::log("Keywords extraction failed for doc #{$doc_id}");
      return;
    }
    self::log("Keywords extraction complete");

    // Step 3: Strategy - Generate anchor bank using AI
    $anchors_ok = self::run_anchor_bank($doc_id);
    if (!$anchors_ok) {
      self::log("Anchor bank generation failed for doc #{$doc_id}");
      // Continue anyway - linker can use keywords as fallback
    } else {
      self::log("Anchor bank generation complete");
    }

    // Step 4: Linker - Add internal links to THIS post
    $links_added = self::run_linker($post_id, $settings);
    self::log("Linker complete: {$links_added} links added to post #{$post_id}");

    // Also run linker on OTHER posts to link TO this new post
    $incoming_links = self::run_linker_incoming($post_id, $doc_id, $settings);
    self::log("Incoming linker complete: {$incoming_links} links added pointing to post #{$post_id}");
  }

  /**
   * Step 1: Run Scanner for a single post
   */
  private static function run_scanner($post_id) {
    if (!class_exists('InternalLinksTool_DB')) return false;

    $doc_id = InternalLinksTool_DB::upsert_document_from_post($post_id);
    return $doc_id ?: false;
  }

  /**
   * Step 2: Run Keywords extraction for a single document
   */
  private static function run_keywords($doc_id) {
    if (!class_exists('InternalLinksTool_Keywords')) return false;
    if (!class_exists('InternalLinksTool_OpenAI')) return false;

    global $wpdb;
    $docs = InternalLinksTool_DB::table('documents');
    $keywords = InternalLinksTool_DB::table('keywords');

    // Get document data
    $doc = $wpdb->get_row($wpdb->prepare(
      "SELECT * FROM {$docs} WHERE id = %d LIMIT 1",
      $doc_id
    ), ARRAY_A);

    if (!$doc) return false;

    $meta_title = (string)($doc['meta_title'] ?? '');
    $meta_desc = (string)($doc['meta_desc'] ?? '');
    $h1 = (string)($doc['h1'] ?? '');
    $url = (string)($doc['url'] ?? '');
    $post_id = (int)($doc['post_id'] ?? 0);

    // Fetch Yoast focus keyword — strongest signal for primary keyword
    $yoast_keyword = '';
    $content_paragraph = '';
    if ($post_id > 0) {
      $yoast_keyword = trim((string)get_post_meta($post_id, '_yoast_wpseo_focuskw', true));
      $p = get_post($post_id);
      if ($p && is_string($p->post_content) && trim($p->post_content) !== '') {
        $text = wp_strip_all_tags($p->post_content, true);
        $text = preg_replace('/\s+/u', ' ', trim($text));
        $parts = preg_split('/\n\s*\n|(?<=\.)\s{2,}/', $text, 2);
        $content_paragraph = trim($parts[0] ?? '');
        if (mb_strlen($content_paragraph) > 600) $content_paragraph = mb_substr($content_paragraph, 0, 600) . '...';
      }
    }

    // Call AI to extract keywords
    $result = InternalLinksTool_OpenAI::extract_keywords($meta_title, $meta_desc, $h1, $url, $yoast_keyword, $content_paragraph);

    if (empty($result) || !empty($result['error'])) {
      return false;
    }

    $primary = trim((string)($result['primary_keyword'] ?? ''));
    $secondary = isset($result['secondary_keywords']) && is_array($result['secondary_keywords'])
      ? implode(', ', $result['secondary_keywords'])
      : '';

    if ($primary === '') return false;

    // Check if keywords already exist
    $exists = $wpdb->get_var($wpdb->prepare(
      "SELECT id FROM {$keywords} WHERE document_id = %d LIMIT 1",
      $doc_id
    ));

    $data = [
      'document_id' => $doc_id,
      'primary_keyword' => $primary,
      'secondary_keywords' => $secondary,
      'source' => 'ai',
      'confidence' => 0.9,
      'updated_at' => current_time('mysql'),
    ];

    if ($exists) {
      $wpdb->update($keywords, $data, ['document_id' => $doc_id]);
    } else {
      $wpdb->insert($keywords, $data);
    }

    return true;
  }

  /**
   * Step 3: Generate anchor bank for a single document
   */
  private static function run_anchor_bank($doc_id) {
    if (!class_exists('InternalLinksTool_Strategy')) return false;
    if (!class_exists('InternalLinksTool_OpenAI')) return false;
    if (!method_exists('InternalLinksTool_OpenAI', 'generate_anchor_bank')) return false;

    global $wpdb;
    $docs = InternalLinksTool_DB::table('documents');
    $keywords = InternalLinksTool_DB::table('keywords');
    $banks = InternalLinksTool_DB::table('anchor_banks');

    // Get document + keywords
    $row = $wpdb->get_row($wpdb->prepare(
      "SELECT d.meta_title, d.meta_desc, k.primary_keyword, k.secondary_keywords
       FROM {$docs} d
       INNER JOIN {$keywords} k ON k.document_id = d.id
       WHERE d.id = %d LIMIT 1",
      $doc_id
    ), ARRAY_A);

    if (!$row || empty($row['primary_keyword'])) return false;

    $primary_kw = trim((string)$row['primary_keyword']);
    $secondary_kw = trim((string)($row['secondary_keywords'] ?? ''));
    $meta_title = trim((string)($row['meta_title'] ?? ''));
    $meta_desc = trim((string)($row['meta_desc'] ?? ''));

    // Generate anchor bank via AI
    $result = InternalLinksTool_OpenAI::generate_anchor_bank($primary_kw, $secondary_kw, $meta_title, $meta_desc);

    if (!empty($result['error'])) {
      return false;
    }

    // Ensure table exists
    $table_exists = $wpdb->get_var("SHOW TABLES LIKE '{$banks}'");
    if (!$table_exists) {
      // Try to create it
      if (class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'install')) {
        InternalLinksTool_DB::install();
      }
    }

    // Clear existing anchors for this document
    $wpdb->delete($banks, ['document_id' => $doc_id]);

    // Save new anchors
    $anchor_types = ['exact', 'partial', 'descriptive', 'contextual', 'generic'];
    $saved = 0;

    foreach ($anchor_types as $type) {
      $anchors = isset($result[$type]) && is_array($result[$type]) ? $result[$type] : [];
      foreach ($anchors as $anchor) {
        $anchor = trim((string)$anchor);
        if ($anchor === '') continue;

        $wpdb->insert($banks, [
          'document_id' => $doc_id,
          'anchor_type' => $type,
          'anchor_text' => $anchor,
          'used_count' => 0,
          'created_at' => current_time('mysql'),
        ]);
        $saved++;
      }
    }

    return $saved > 0;
  }

  /**
   * Step 4: Run Linker for a single source post (add links FROM this post)
   */
  private static function run_linker($post_id, $settings) {
    global $wpdb;

    if (!class_exists('InternalLinksTool_Linker')) return 0;

    $post = get_post($post_id);
    if (!$post) return 0;

    $existing_links = InternalLinksTool_DB::table('existing_links');
    $source_url = get_permalink($post_id);

    // Get targets pool (all other pages)
    $targets = self::get_targets_for_linking($post_id, $settings);
    if (empty($targets)) return 0;

    // Filter targets by source post type
    $source_type = $post->post_type;
    $allowed_target_types = self::get_allowed_target_types($settings, $source_type);
    $targets = array_values(array_filter($targets, fn($t) => in_array($t['post_type'], $allowed_target_types, true)));
    if (empty($targets)) return 0;

    // Simple link insertion
    $content = $post->post_content;
    $max_links = (int)($settings['max_links_per_page'] ?? 3);
    $skip_sentences = (int)($settings['skip_sentences'] ?? 2);

    $inserted = 0;
    $used_targets = [];
    $links_to_save = [];

    // Split content into processable chunks (skip first N sentences)
    $sentences = self::split_into_sentences($content);

    foreach ($sentences as $idx => $sentence) {
      if ($inserted >= $max_links) break;
      if ($idx < $skip_sentences) continue;

      foreach ($targets as $t) {
        if ($inserted >= $max_links) break;
        if (in_array($t['post_id'], $used_targets)) continue;

        // Get anchor to search for (prioritized: exact from bank, then primary kw, then others)
        $anchors = !empty($t['anchor_bank']) ? self::flatten_anchor_bank($t['anchor_bank']) : ($t['fallback_anchors'] ?? []);

        foreach ($anchors as $anchor) {
          if (strlen($anchor) < 3) continue;

          if (preg_match('/\b' . preg_quote($anchor, '/') . '\b/iu', $sentence)) {
            // Make sure it's not already inside a link
            if (preg_match('/<a\s[^>]*>[^<]*' . preg_quote($anchor, '/') . '[^<]*<\/a>/iu', $content)) {
              continue;
            }

            // Found a match - insert link
            $title_attr = !empty($settings['link_title_attr']) ? ' title="' . esc_attr($anchor) . '"' : '';

            $new_content = preg_replace(
              '/\b(' . preg_quote($anchor, '/') . ')\b/iu',
              '<a href="' . esc_url($t['url']) . '"' . $title_attr . '>$1</a>',
              $content,
              1,
              $count
            );

            if ($count > 0 && $new_content !== $content) {
              $content = $new_content;
              $inserted++;
              $used_targets[] = $t['post_id'];

              // Track for saving
              $links_to_save[] = [
                'target_post_id' => $t['post_id'],
                'target_url' => $t['url'],
                'anchor_text' => $anchor,
              ];

              self::log("Link added FROM #{$post_id} TO #{$t['post_id']} (anchor: {$anchor})");
              break 2;
            }
          }
        }
      }
    }

    if ($inserted > 0) {
      // Update post content
      remove_action('save_post', ['InternalLinksTool_ExistingLinks', 'on_post_save'], 20);
      wp_update_post([
        'ID' => $post_id,
        'post_content' => $content,
      ]);
      add_action('save_post', ['InternalLinksTool_ExistingLinks', 'on_post_save'], 20, 2);

      // Save to existing_links table
      foreach ($links_to_save as $link_data) {
        $wpdb->insert($existing_links, [
          'source_post_id' => $post_id,
          'source_url' => $source_url,
          'target_url' => $link_data['target_url'],
          'target_post_id' => $link_data['target_post_id'],
          'anchor_text' => $link_data['anchor_text'],
          'link_position' => 1,
          'updated_at' => current_time('mysql'),
        ]);
      }
    }

    return $inserted;
  }

  /**
   * Run Linker to add links TO this new post from other posts
   */
  private static function run_linker_incoming($post_id, $doc_id, $settings) {
    global $wpdb;

    $docs = InternalLinksTool_DB::table('documents');
    $keywords = InternalLinksTool_DB::table('keywords');
    $banks = InternalLinksTool_DB::table('anchor_banks');
    $existing_links = InternalLinksTool_DB::table('existing_links');

    // Get the new post's info
    $new_post = $wpdb->get_row($wpdb->prepare(
      "SELECT d.url, k.primary_keyword, k.secondary_keywords
       FROM {$docs} d
       INNER JOIN {$keywords} k ON k.document_id = d.id
       WHERE d.id = %d LIMIT 1",
      $doc_id
    ), ARRAY_A);

    if (!$new_post || empty($new_post['primary_keyword'])) return 0;

    $target_url = $new_post['url'];

    // Build anchors list: primary first, then anchor bank (exact first), then secondary
    $anchors = [];

    // Primary keywords (can be comma-separated for multiple primaries)
    $primaries = array_filter(array_map('trim', explode(',', $new_post['primary_keyword'])));
    $anchors = array_merge($anchors, $primaries);

    // Get anchor bank if exists (prioritize exact matches)
    $bank_rows = $wpdb->get_results($wpdb->prepare(
      "SELECT anchor_text, anchor_type FROM {$banks}
       WHERE document_id = %d
       ORDER BY FIELD(anchor_type, 'exact', 'partial', 'descriptive', 'contextual', 'generic'), used_count ASC
       LIMIT 20",
      $doc_id
    ), ARRAY_A);

    if (!empty($bank_rows)) {
      $anchors = array_merge($anchors, array_column($bank_rows, 'anchor_text'));
    }

    // Add secondary keywords last
    if (!empty($new_post['secondary_keywords'])) {
      $secondary = array_filter(array_map('trim', explode(',', $new_post['secondary_keywords'])));
      $anchors = array_merge($anchors, $secondary);
    }

    $anchors = array_unique(array_filter($anchors));

    // Find other posts that could link to this one
    // Get posts that contain any of our anchors and don't already link to this target
    $max_source_posts = 50;
    $source_posts = $wpdb->get_results($wpdb->prepare(
      "SELECT d.post_id, d.url as source_url FROM {$docs} d
       WHERE d.post_id != %d AND d.status = 'publish'
       ORDER BY d.updated_at DESC
       LIMIT %d",
      $post_id, $max_source_posts
    ), ARRAY_A);

    $total_inserted = 0;
    $max_links_per_source = (int)($settings['max_links_per_page'] ?? 3);
    $target_type = get_post_type($post_id);

    foreach ($source_posts as $sp) {
      $source_id = (int)$sp['post_id'];
      $source_url = $sp['source_url'] ?? '';
      $source_post = get_post($source_id);
      if (!$source_post) continue;

      // Check if this source type is allowed to link to the target type
      $source_type = $source_post->post_type;
      $allowed_target_types = self::get_allowed_target_types($settings, $source_type);
      if (!in_array($target_type, $allowed_target_types, true)) continue;

      // Check existing link count from existing_links table
      $existing_count = (int)$wpdb->get_var($wpdb->prepare(
        "SELECT COUNT(*) FROM {$existing_links} WHERE source_post_id = %d",
        $source_id
      ));

      if ($existing_count >= $max_links_per_source) {
        self::log("Skipping source #{$source_id}: already has {$existing_count}/{$max_links_per_source} links");
        continue;
      }

      // Check if already has a link to this target
      $already_linked = (int)$wpdb->get_var($wpdb->prepare(
        "SELECT COUNT(*) FROM {$existing_links} WHERE source_post_id = %d AND target_post_id = %d",
        $source_id, $post_id
      ));

      if ($already_linked > 0) continue;

      $content = $source_post->post_content;
      $inserted = 0;
      $used_anchor = '';

      // Also check if already has a link to target URL in content
      if (stripos($content, $target_url) !== false) continue;

      // Try to insert one link using anchors in priority order
      foreach ($anchors as $anchor) {
        if ($inserted > 0) break;
        if (strlen($anchor) < 3) continue;

        // Check if anchor exists in content (case-insensitive, word boundary)
        if (preg_match('/\b' . preg_quote($anchor, '/') . '\b/iu', $content)) {
          // Make sure it's not already inside a link
          if (preg_match('/<a\s[^>]*>[^<]*' . preg_quote($anchor, '/') . '[^<]*<\/a>/iu', $content)) {
            continue; // Already linked
          }

          $title_attr = !empty($settings['link_title_attr']) ? ' title="' . esc_attr($anchor) . '"' : '';
          $link = '<a href="' . esc_url($target_url) . '"' . $title_attr . '>' . '$0' . '</a>';

          // Replace only the first occurrence, preserving original case
          $new_content = preg_replace(
            '/\b(' . preg_quote($anchor, '/') . ')\b/iu',
            '<a href="' . esc_url($target_url) . '"' . $title_attr . '>$1</a>',
            $content,
            1,
            $count
          );

          if ($count > 0 && $new_content !== $content) {
            $content = $new_content;
            $inserted++;
            $used_anchor = $anchor;
            $total_inserted++;
          }
        }
      }

      if ($inserted > 0) {
        // Update post content
        remove_action('save_post', ['InternalLinksTool_ExistingLinks', 'on_post_save'], 20);
        wp_update_post([
          'ID' => $source_id,
          'post_content' => $content,
        ]);
        add_action('save_post', ['InternalLinksTool_ExistingLinks', 'on_post_save'], 20, 2);

        // Save to existing_links table
        $wpdb->insert($existing_links, [
          'source_post_id' => $source_id,
          'source_url' => $source_url,
          'target_url' => $target_url,
          'target_post_id' => $post_id,
          'anchor_text' => $used_anchor,
          'link_position' => 1,
          'updated_at' => current_time('mysql'),
        ]);

        self::log("Inserted link: source #{$source_id} -> target #{$post_id} (anchor: {$used_anchor})");
      }
    }

    return $total_inserted;
  }

  /**
   * Resolve which target post types a source type is allowed to link to.
   */
  private static function get_allowed_target_types($settings, $source_type) {
    $types = [];
    if (!empty($settings['include_posts'])) $types[] = 'post';
    if (!empty($settings['include_pages'])) $types[] = 'page';
    if (empty($types)) $types = ['post', 'page'];
    $key = ($source_type === 'page') ? 'page_link_targets' : 'post_link_targets';
    $val = $settings[$key] ?? 'all';
    if ($val === 'none') return [];
    if ($val === 'all') return $types;
    return in_array($val, $types, true) ? [$val] : [];
  }

  /**
   * Check if URL passes the include-URLs whitelist.
   */
  private static function is_included_url($url, $settings) {
    if (empty($settings['include_urls'])) return true;
    $raw = trim((string)$settings['include_urls']);
    if ($raw === '') return true;
    $parts = preg_split('/\r\n|\r|\n|,/', $raw);
    foreach ($parts as $p) {
      $p = trim($p);
      if ($p === '') continue;
      if (strpos($url, $p) !== false) return true;
    }
    return false;
  }

  /**
   * Get targets for linking (excluding the source post)
   */
  private static function get_targets_for_linking($exclude_post_id, $settings) {
    global $wpdb;

    $docs = InternalLinksTool_DB::table('documents');
    $keywords = InternalLinksTool_DB::table('keywords');
    $banks = InternalLinksTool_DB::table('anchor_banks');

    // Get documents with keywords
    $rows = $wpdb->get_results($wpdb->prepare(
      "SELECT d.id as doc_id, d.post_id, d.url, d.type, k.primary_keyword, k.secondary_keywords
       FROM {$docs} d
       INNER JOIN {$keywords} k ON k.document_id = d.id
       WHERE d.post_id != %d AND d.status = 'publish'
       AND k.primary_keyword IS NOT NULL AND k.primary_keyword != ''
       ORDER BY RAND()
       LIMIT 100",
      $exclude_post_id
    ), ARRAY_A);

    $targets = [];
    foreach ($rows as $r) {
      $doc_id = (int)$r['doc_id'];

      // Include-URL whitelist filter
      if (!self::is_included_url((string)$r['url'], $settings)) continue;

      // Try to get anchor bank
      $anchor_bank = [];
      $bank_rows = $wpdb->get_results($wpdb->prepare(
        "SELECT anchor_type, anchor_text FROM {$banks} WHERE document_id = %d ORDER BY used_count ASC",
        $doc_id
      ), ARRAY_A);

      if (!empty($bank_rows)) {
        foreach ($bank_rows as $br) {
          $atype = $br['anchor_type'];
          if (!isset($anchor_bank[$atype])) $anchor_bank[$atype] = [];
          $anchor_bank[$atype][] = $br['anchor_text'];
        }
      }

      // Fallback anchors
      $fallback = [$r['primary_keyword']];
      if (!empty($r['secondary_keywords'])) {
        $fallback = array_merge($fallback, array_filter(array_map('trim', explode(',', $r['secondary_keywords']))));
      }

      $targets[] = [
        'doc_id' => $doc_id,
        'post_id' => (int)$r['post_id'],
        'post_type' => (string)($r['type'] ?? 'post'),
        'url' => $r['url'],
        'anchor_bank' => $anchor_bank,
        'fallback_anchors' => $fallback,
      ];
    }

    return $targets;
  }

  /**
   * Flatten anchor bank to array
   */
  private static function flatten_anchor_bank($bank) {
    $anchors = [];
    foreach (['exact', 'partial', 'descriptive', 'contextual', 'generic'] as $type) {
      if (isset($bank[$type]) && is_array($bank[$type])) {
        $anchors = array_merge($anchors, $bank[$type]);
      }
    }
    return array_unique($anchors);
  }

  /**
   * Simple sentence splitter
   */
  private static function split_into_sentences($text) {
    $text = strip_tags($text);
    $parts = preg_split('/(?<=[.!?])\s+/', $text);
    return array_values(array_filter(array_map('trim', $parts)));
  }

  /**
   * Simple logger (stores in option for debugging)
   */
  private static function log($message) {
    $logs = get_option('internallinkstool_automation_log', []);
    if (!is_array($logs)) $logs = [];

    $logs[] = [
      'time' => current_time('mysql'),
      'message' => $message,
    ];

    // Keep last 100 entries
    if (count($logs) > 100) {
      $logs = array_slice($logs, -100);
    }

    update_option('internallinkstool_automation_log', $logs, false);
  }

  /**
   * Get automation log (for debugging)
   */
  public static function get_log() {
    return get_option('internallinkstool_automation_log', []);
  }

  /**
   * Clear automation log
   */
  public static function clear_log() {
    delete_option('internallinkstool_automation_log');
  }
}

// Register the cron action outside of init to ensure it's always available
add_action('internallinkstool_auto_process_post', ['InternalLinksTool_Automation', 'process_single_post']);
