<?php
if (!defined('ABSPATH')) exit;

class InternalLinksTool_Scanner {

  // Force a rescan of already-mapped docs (UI button)
  private static $opt_force_rescan = 'internallinkstool_force_rescan';

  public static function init() {
    add_action('admin_post_internallinkstool_run_scan', [__CLASS__, 'handle_run_scan']);
    add_action('admin_post_internallinkstool_reset_scan', [__CLASS__, 'handle_reset_scan']);
    add_action('admin_post_internallinkstool_clear_scanner', [__CLASS__, 'handle_clear_scanner']);
  }

  private static function safe_redirect($url) {
    // Avoid white screens on hosts that treat header warnings as fatal
    if (!headers_sent()) {
      wp_safe_redirect($url);
      exit;
    }

    // Fallback: render a clickable link if headers already sent
    echo '<div class="wrap"><h1>Redirect</h1>';
    echo '<p><a href="' . esc_url($url) . '">Continue</a></p></div>';
    exit;
  }

  private static function get_allowed_statuses() {
    // Centralized settings (default: published only)
    if (class_exists('InternalLinksTool_Admin')) {
      $s = InternalLinksTool_Admin::get_settings();
      if (isset($s['scan_statuses']) && is_array($s['scan_statuses']) && !empty($s['scan_statuses'])) {
        $statuses = array_values(array_unique(array_map('sanitize_key', $s['scan_statuses'])));
        $statuses = array_values(array_intersect($statuses, ['publish', 'draft']));
        if (!empty($statuses)) return $statuses;
      }
    }

    // Back-compat (older builds stored statuses separately)
    $legacy = get_option('internallinkstool_scan_statuses', null);
    if (is_array($legacy) && !empty($legacy)) {
      $legacy = array_values(array_unique(array_map('sanitize_key', $legacy)));
      $legacy = array_values(array_intersect($legacy, ['publish','draft']));
      if (!empty($legacy)) return $legacy;
    }

    return ['publish'];
  }

  public static function render_scan_page() {
    if (!current_user_can('manage_options')) return;

    $batch_size = isset($_GET['batch_size']) ? (int)$_GET['batch_size'] : 50;
    $batch_size = max(1, min(200, $batch_size));

    $last_msg = isset($_GET['msg']) ? sanitize_text_field($_GET['msg']) : '';
    $last_err = isset($_GET['err']) ? sanitize_text_field($_GET['err']) : '';
    $processed = isset($_GET['processed']) ? (int)$_GET['processed'] : null;

    $statuses = self::get_allowed_statuses();
    $progress = self::get_progress_counts($statuses);

    echo '<div class="wrap">';
    echo '<h1>Scanner</h1>';

    echo '<div style="background:#fef8e7;border-left:4px solid #f0c33c;padding:12px 16px;margin:10px 0 16px;">';
    echo '<p style="margin:0;font-size:13px;"><strong>You don\'t need to run this manually.</strong> The <a href="' . esc_url(admin_url('admin.php?page=internallinkstool-linker')) . '">Linker</a> page\'s "Run All" handles scanning, keyword extraction, and anchor bank generation automatically. This page (along with Keywords and AI Anchor Banks) is available for SEO experts who want more granular control &mdash; to review what is scanned, inspect extracted data, or re-run individual steps.</p>';
    echo '</div>';

    echo '<p>The Scanner maps all your posts and pages into the plugin database, extracting meta titles, descriptions, and H1 tags. ';
    echo 'This data is used by the Keywords and Linker features. ';
    echo '<strong>Configure your <a href="' . esc_url(admin_url('admin.php?page=internallinkstool-settings')) . '">Link Settings</a> first</strong> to control which post types and statuses are included.</p>';

    // Get current settings for summary
    $settings = class_exists('InternalLinksTool_Admin') ? InternalLinksTool_Admin::get_settings() : [];

    // Build post types list
    $types_list = [];
    if (!empty($settings['include_posts'])) $types_list[] = 'Posts';
    if (!empty($settings['include_pages'])) $types_list[] = 'Pages';
    if (empty($types_list)) $types_list = ['Posts', 'Pages'];

    // Build statuses list
    $statuses_list = [];
    if (in_array('publish', $statuses)) $statuses_list[] = 'Published';
    if (in_array('draft', $statuses)) $statuses_list[] = 'Draft';
    if (empty($statuses_list)) $statuses_list = ['Published'];

    // Respect robots
    $respect_robots = !empty($settings['respect_robots']) ? 'Yes (excluding noindex/nofollow)' : 'No';

    // Current scan settings summary
    echo '<div class="notice notice-warning" style="border-left-color:#2271b1;background:#f0f6fc;">';
    echo '<p><strong>Current Scan Settings:</strong></p>';
    echo '<ul style="margin:5px 0 5px 20px;">';
    echo '<li><strong>Post Types:</strong> ' . esc_html(implode(', ', $types_list)) . '</li>';
    echo '<li><strong>Statuses:</strong> ' . esc_html(implode(', ', $statuses_list)) . '</li>';
    echo '<li><strong>Respect Robots:</strong> ' . esc_html($respect_robots) . '</li>';
    echo '</ul>';
    echo '<p class="description" style="margin-top:5px;">Change these in <a href="' . esc_url(admin_url('admin.php?page=internallinkstool-settings')) . '">Link Settings</a>.</p>';
    echo '</div>';

    if ($last_err) echo '<div class="notice notice-error"><p>' . esc_html($last_err) . '</p></div>';
    elseif ($last_msg) echo '<div class="notice notice-info"><p>' . esc_html($last_msg) . '</p></div>';

    if ($processed !== null && $processed > 0) {
      echo '<div class="notice notice-success"><p>';
      echo 'Batch complete! Scanned <strong>' . (int)$processed . '</strong> item(s).';
      echo '</p></div>';
    }

    echo '<p><strong>Progress (eligible):</strong> ';
    echo '<code>' . (int)$progress['mapped_eligible'] . '</code> mapped out of ';
    echo '<code>' . (int)$progress['total_eligible'] . '</code> eligible. ';
    echo 'Remaining: <code>' . (int)$progress['remaining'] . '</code>.';
    echo '</p>';

    // Run scan batch
    echo '<form method="post" action="' . esc_url(admin_url('admin-post.php')) . '">';
    echo '<input type="hidden" name="action" value="internallinkstool_run_scan" />';
    wp_nonce_field('internallinkstool_run_scan');

    echo '<table class="form-table"><tr>';
    echo '<th scope="row">Batch size</th>';
    echo '<td>';
    echo '<input type="number" name="batch_size" value="' . esc_attr($batch_size) . '" min="1" max="200" />';
    echo '<p class="description">How many posts/pages to process per run (avoid timeouts).</p>';
    echo '</td></tr></table>';

    submit_button('Run Scan Batch');
    echo '</form>';

    // Re-scan existing documents option
    echo '<form method="post" action="' . esc_url(admin_url('admin-post.php')) . '" style="margin-top:10px;">';
    echo '<input type="hidden" name="action" value="internallinkstool_reset_scan" />';
    wp_nonce_field('internallinkstool_reset_scan');
    submit_button('Re-scan Existing Documents', 'secondary');
    echo '<p class="description">Next batch will re-scan already-mapped documents to refresh their data.</p>';
    echo '</form>';

    echo '<hr><p><strong>Tip:</strong> Keep clicking "Run Scan Batch" until Remaining reaches 0.</p>';

    // Clear database section
    echo '<hr>';
    echo '<h2>Clear Scanner Database</h2>';
    echo '<p class="description">Delete all scanned document data and start fresh. This will also clear all keywords.</p>';
    echo '<form method="post" action="' . esc_url(admin_url('admin-post.php')) . '" onsubmit="return confirm(\'Are you sure? This will delete ALL scanned documents and keywords. This cannot be undone.\');">';
    echo '<input type="hidden" name="action" value="internallinkstool_clear_scanner" />';
    wp_nonce_field('internallinkstool_clear_scanner');
    submit_button('Clear All Scanned Data', 'delete');
    echo '</form>';
    echo '</div>';
  }

  public static function handle_reset_scan() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    // Nonce check
    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_reset_scan')) {
      self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('Nonce failed (reset). Refresh and try again.')));
    }

    // Set the flag safely
    update_option(self::$opt_force_rescan, 1, false);

    self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner&msg=' . rawurlencode('Re-scan enabled for the next batch.')));
  }

  public static function handle_clear_scanner() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_clear_scanner')) {
      self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('Nonce failed. Refresh and try again.')));
    }

    if (!class_exists('InternalLinksTool_DB')) {
      self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('DB class not available.')));
    }

    global $wpdb;

    // Clear documents table
    $docs_table = InternalLinksTool_DB::table('documents');
    $wpdb->query("TRUNCATE TABLE {$docs_table}");

    // Clear keywords table (since keywords reference documents)
    $keywords_table = InternalLinksTool_DB::table('keywords');
    $wpdb->query("TRUNCATE TABLE {$keywords_table}");

    // Reset the force rescan flag
    update_option(self::$opt_force_rescan, 0, false);

    self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner&msg=' . rawurlencode('Scanner database cleared. All documents and keywords have been removed.')));
  }

  public static function handle_run_scan() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_run_scan')) {
      self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('Nonce failed (run). Refresh and try again.')));
    }

    $batch_size = isset($_POST['batch_size']) ? (int)$_POST['batch_size'] : 50;
    $batch_size = max(1, min(200, $batch_size));

    $statuses = self::get_allowed_statuses();
    $force_rescan = (int)get_option(self::$opt_force_rescan, 0);

    // Determine eligible post types from centralized settings
    $settings = class_exists('InternalLinksTool_Admin') ? InternalLinksTool_Admin::get_settings() : [];
    $types = [];
    if (!empty($settings['include_posts'])) $types[] = 'post';
    if (!empty($settings['include_pages'])) $types[] = 'page';
    if (empty($types)) $types = ['post','page'];

    $ids = [];

    // If not forcing rescan, only scan posts not yet mapped
    if (!$force_rescan && class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'get_unmapped_post_ids')) {
      $ids = InternalLinksTool_DB::get_unmapped_post_ids($types, $statuses, $batch_size);
    } else {
      $q = new WP_Query([
        'post_type'      => $types,
        'post_status'    => $statuses,
        'posts_per_page' => $batch_size,
        'fields'         => 'ids',
        'orderby'        => 'ID',
        'order'          => 'ASC',
      ]);
      $ids = $q->posts;
    }

    $processed = 0;
    if (!empty($ids) && class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'upsert_document_from_post')) {
      foreach ($ids as $post_id) {
        $post_id = (int)$post_id;
        if ($post_id <= 0) continue;
        InternalLinksTool_DB::upsert_document_from_post($post_id);
        $processed++;
      }
    }

    // disable force rescan after one run
    if ($force_rescan) update_option(self::$opt_force_rescan, 0, false);

    self::safe_redirect(admin_url('admin.php?page=internallinkstool-scanner'
      . '&processed=' . (int)$processed
    ));
  }

  private static function get_progress_counts($statuses) {
    $statuses = is_array($statuses) ? $statuses : ['publish'];

    $settings = class_exists('InternalLinksTool_Admin') ? InternalLinksTool_Admin::get_settings() : [];
    $types = [];
    if (!empty($settings['include_posts'])) $types[] = 'post';
    if (!empty($settings['include_pages'])) $types[] = 'page';
    if (empty($types)) $types = ['post','page'];

    // Total eligible in wp_posts
    $q = new WP_Query([
      'post_type'      => $types,
      'post_status'    => $statuses,
      'posts_per_page' => 1,
      'fields'         => 'ids',
    ]);
    $total_eligible = (int)$q->found_posts;

    $mapped_total = 0;
    $mapped_eligible = 0;

    if (class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'count_documents_total')) {
      $mapped_total = (int)InternalLinksTool_DB::count_documents_total();
    }
    if (class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'count_documents_by_types_and_statuses')) {
      $mapped_eligible = (int)InternalLinksTool_DB::count_documents_by_types_and_statuses($types, $statuses);
    }

    $remaining = max(0, $total_eligible - $mapped_eligible);

    return [
      'total_eligible'  => $total_eligible,
      'mapped_eligible' => $mapped_eligible,
      'mapped_total'    => $mapped_total,
      'remaining'       => $remaining,
    ];
  }
}
