<?php
if (!defined('ABSPATH')) exit;

class InternalLinksTool_Cron {

  // Option keys for tracking background jobs
  const OPT_SCANNER_JOB = 'internallinkstool_scanner_bg_job';
  const OPT_KEYWORDS_JOB = 'internallinkstool_keywords_bg_job';

  public static function init() {
    // Register cron hooks
    add_action('internallinkstool_scanner_cron', [__CLASS__, 'run_scanner_batch']);
    add_action('internallinkstool_keywords_cron', [__CLASS__, 'run_keywords_batch']);

    // Admin handlers to start/stop background jobs
    add_action('admin_post_internallinkstool_start_scanner_bg', [__CLASS__, 'handle_start_scanner_bg']);
    add_action('admin_post_internallinkstool_stop_scanner_bg', [__CLASS__, 'handle_stop_scanner_bg']);
    add_action('admin_post_internallinkstool_start_keywords_bg', [__CLASS__, 'handle_start_keywords_bg']);
    add_action('admin_post_internallinkstool_stop_keywords_bg', [__CLASS__, 'handle_stop_keywords_bg']);
  }

  /* ===========================
   * SCANNER BACKGROUND JOB
   * =========================== */

  public static function handle_start_scanner_bg() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_start_scanner_bg')) {
      wp_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('Nonce failed.')));
      exit;
    }

    $total_to_process = isset($_POST['bg_total']) ? (int)$_POST['bg_total'] : 500;
    $total_to_process = max(50, min(5000, $total_to_process));

    $batch_size = isset($_POST['bg_batch_size']) ? (int)$_POST['bg_batch_size'] : 25;
    $batch_size = max(10, min(100, $batch_size));

    $force_rescan = isset($_POST['bg_force_rescan']) && $_POST['bg_force_rescan'] === '1';

    // Initialize job status
    $job = [
      'status' => 'running',
      'total_target' => $total_to_process,
      'batch_size' => $batch_size,
      'processed' => 0,
      'force_rescan' => $force_rescan,
      'last_post_id' => 0,
      'started_at' => current_time('mysql'),
      'last_run' => null,
      'message' => 'Starting background scan...',
    ];
    update_option(self::OPT_SCANNER_JOB, $job, false);

    // Run first batch immediately (synchronously) to verify it works
    self::run_scanner_batch();

    // Get updated job status
    $job = get_option(self::OPT_SCANNER_JOB, []);
    $status = $job['status'] ?? 'unknown';
    $processed = (int)($job['processed'] ?? 0);
    $message = $job['message'] ?? '';

    // If job is still running, schedule next batch
    if ($status === 'running' && $processed > 0) {
      wp_schedule_single_event(time() + 5, 'internallinkstool_scanner_cron');
      wp_redirect(admin_url('admin.php?page=internallinkstool-scanner&msg=' . rawurlencode('Background scan started. First batch processed ' . $processed . ' items.')));
    } elseif ($status === 'completed') {
      wp_redirect(admin_url('admin.php?page=internallinkstool-scanner&msg=' . rawurlencode($message)));
    } else {
      // Debug: show what went wrong
      $debug = 'Status: ' . $status . ', Processed: ' . $processed . ', Force: ' . ($force_rescan ? 'yes' : 'no');
      wp_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('Background scan issue. ' . $debug)));
    }
    exit;
  }

  public static function handle_stop_scanner_bg() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_stop_scanner_bg')) {
      wp_redirect(admin_url('admin.php?page=internallinkstool-scanner&err=' . rawurlencode('Nonce failed.')));
      exit;
    }

    // Stop the job
    $job = get_option(self::OPT_SCANNER_JOB, []);
    if (!empty($job)) {
      $job['status'] = 'stopped';
      $job['message'] = 'Stopped by user at ' . (int)($job['processed'] ?? 0) . ' items processed.';
      update_option(self::OPT_SCANNER_JOB, $job, false);
    }

    // Unschedule cron
    $timestamp = wp_next_scheduled('internallinkstool_scanner_cron');
    if ($timestamp) {
      wp_unschedule_event($timestamp, 'internallinkstool_scanner_cron');
    }

    wp_redirect(admin_url('admin.php?page=internallinkstool-scanner&msg=' . rawurlencode('Background scan stopped.')));
    exit;
  }

  public static function run_scanner_batch() {
    $job = get_option(self::OPT_SCANNER_JOB, []);

    if (empty($job) || ($job['status'] ?? '') !== 'running') {
      return;
    }

    $batch_size = (int)($job['batch_size'] ?? 25);
    $total_target = (int)($job['total_target'] ?? 500);
    $processed = (int)($job['processed'] ?? 0);
    $force_rescan = !empty($job['force_rescan']);
    $last_post_id = (int)($job['last_post_id'] ?? 0);

    if ($processed >= $total_target) {
      $job['status'] = 'completed';
      $job['message'] = 'Completed! Processed ' . $processed . ' items.';
      update_option(self::OPT_SCANNER_JOB, $job, false);
      return;
    }

    // Get settings
    $settings = class_exists('InternalLinksTool_Admin') ? InternalLinksTool_Admin::get_settings() : [];
    $types = [];
    if (!empty($settings['include_posts'])) $types[] = 'post';
    if (!empty($settings['include_pages'])) $types[] = 'page';
    if (empty($types)) $types = ['post', 'page'];

    $statuses = ['publish'];
    if (!empty($settings['scan_statuses']) && is_array($settings['scan_statuses'])) {
      $statuses = array_values(array_intersect($settings['scan_statuses'], ['publish', 'draft']));
      if (empty($statuses)) $statuses = ['publish'];
    }

    // Get posts to process
    $ids = [];
    global $wpdb;

    if ($force_rescan) {
      // Force rescan: get ALL posts (paginated by last_post_id using direct SQL for efficiency)
      $type_placeholders = implode(',', array_fill(0, count($types), '%s'));
      $status_placeholders = implode(',', array_fill(0, count($statuses), '%s'));

      $sql = "SELECT ID FROM {$wpdb->posts}
              WHERE post_type IN ({$type_placeholders})
              AND post_status IN ({$status_placeholders})
              AND ID > %d
              ORDER BY ID ASC
              LIMIT %d";

      $params = array_merge($types, $statuses, [$last_post_id, $batch_size]);
      $ids = $wpdb->get_col($wpdb->prepare($sql, $params));

      // Store debug info
      $job['debug_types'] = $types;
      $job['debug_statuses'] = $statuses;
      $job['debug_last_id'] = $last_post_id;
      $job['debug_found'] = count($ids);
    } else {
      // Normal: only get unmapped posts
      if (class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'get_unmapped_post_ids')) {
        $ids = InternalLinksTool_DB::get_unmapped_post_ids($types, $statuses, $batch_size);
        $job['debug_found'] = count($ids);
        $job['debug_mode'] = 'unmapped_only';
      }
    }

    $batch_processed = 0;
    $max_id = $last_post_id;
    if (!empty($ids) && class_exists('InternalLinksTool_DB') && method_exists('InternalLinksTool_DB', 'upsert_document_from_post')) {
      foreach ($ids as $post_id) {
        $post_id = (int)$post_id;
        if ($post_id <= 0) continue;
        InternalLinksTool_DB::upsert_document_from_post($post_id);
        $batch_processed++;
        if ($post_id > $max_id) $max_id = $post_id;
      }
    }

    $processed += $batch_processed;
    $job['processed'] = $processed;
    $job['last_post_id'] = $max_id;
    $job['last_run'] = current_time('mysql');

    if ($batch_processed === 0 || $processed >= $total_target) {
      $job['status'] = 'completed';
      $job['message'] = 'Completed! Processed ' . $processed . ' items total.';
    } else {
      $job['message'] = 'Running... Processed ' . $processed . ' of ' . $total_target . ' items.';
      // Schedule next batch in 5 seconds
      wp_schedule_single_event(time() + 5, 'internallinkstool_scanner_cron');
    }

    update_option(self::OPT_SCANNER_JOB, $job, false);
  }

  public static function get_scanner_job_status() {
    return get_option(self::OPT_SCANNER_JOB, []);
  }

  /* ===========================
   * KEYWORDS BACKGROUND JOB
   * =========================== */

  public static function handle_start_keywords_bg() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_start_keywords_bg')) {
      wp_redirect(admin_url('admin.php?page=internallinkstool-keywords&err=' . rawurlencode('Nonce failed.')));
      exit;
    }

    $total_to_process = isset($_POST['bg_total']) ? (int)$_POST['bg_total'] : 100;
    $total_to_process = max(10, min(1000, $total_to_process));

    $batch_size = isset($_POST['bg_batch_size']) ? (int)$_POST['bg_batch_size'] : 5;
    $batch_size = max(1, min(20, $batch_size));

    $force_reextract = isset($_POST['bg_force_reextract']) && $_POST['bg_force_reextract'] === '1';

    // Initialize job status
    $job = [
      'status' => 'running',
      'total_target' => $total_to_process,
      'batch_size' => $batch_size,
      'processed' => 0,
      'saved' => 0,
      'errors' => 0,
      'force_reextract' => $force_reextract,
      'last_doc_id' => 0,
      'started_at' => current_time('mysql'),
      'last_run' => null,
      'message' => 'Starting background keyword extraction...',
    ];
    update_option(self::OPT_KEYWORDS_JOB, $job, false);

    // Schedule immediate cron event
    if (!wp_next_scheduled('internallinkstool_keywords_cron')) {
      wp_schedule_single_event(time(), 'internallinkstool_keywords_cron');
    }

    wp_redirect(admin_url('admin.php?page=internallinkstool-keywords&msg=' . rawurlencode('Background keyword extraction started. Processing ' . $total_to_process . ' items in batches of ' . $batch_size . '.')));
    exit;
  }

  public static function handle_stop_keywords_bg() {
    if (!current_user_can('manage_options')) wp_die('No permission');

    $nonce = isset($_POST['_wpnonce']) ? $_POST['_wpnonce'] : '';
    if (!$nonce || !wp_verify_nonce($nonce, 'internallinkstool_stop_keywords_bg')) {
      wp_redirect(admin_url('admin.php?page=internallinkstool-keywords&err=' . rawurlencode('Nonce failed.')));
      exit;
    }

    // Stop the job
    $job = get_option(self::OPT_KEYWORDS_JOB, []);
    if (!empty($job)) {
      $job['status'] = 'stopped';
      $job['message'] = 'Stopped by user. Processed: ' . (int)($job['processed'] ?? 0) . ', Saved: ' . (int)($job['saved'] ?? 0) . '.';
      update_option(self::OPT_KEYWORDS_JOB, $job, false);
    }

    // Unschedule cron
    $timestamp = wp_next_scheduled('internallinkstool_keywords_cron');
    if ($timestamp) {
      wp_unschedule_event($timestamp, 'internallinkstool_keywords_cron');
    }

    wp_redirect(admin_url('admin.php?page=internallinkstool-keywords&msg=' . rawurlencode('Background keyword extraction stopped.')));
    exit;
  }

  public static function run_keywords_batch() {
    $job = get_option(self::OPT_KEYWORDS_JOB, []);

    if (empty($job) || ($job['status'] ?? '') !== 'running') {
      return;
    }

    $batch_size = (int)($job['batch_size'] ?? 5);
    $total_target = (int)($job['total_target'] ?? 100);
    $processed = (int)($job['processed'] ?? 0);
    $saved = (int)($job['saved'] ?? 0);
    $errors = (int)($job['errors'] ?? 0);
    $force_reextract = !empty($job['force_reextract']);
    $last_doc_id = (int)($job['last_doc_id'] ?? 0);

    if ($processed >= $total_target) {
      $job['status'] = 'completed';
      $job['message'] = 'Completed! Processed: ' . $processed . ', Saved: ' . $saved . ', Errors: ' . $errors . '.';
      update_option(self::OPT_KEYWORDS_JOB, $job, false);
      return;
    }

    // Run keyword extraction batch
    try {
      if ($force_reextract) {
        // Force re-extract: process ALL documents (paginated)
        $res = self::run_keywords_batch_force($batch_size, $last_doc_id);
      } elseif (class_exists('InternalLinksTool_Keywords') && method_exists('InternalLinksTool_Keywords', 'run_batch')) {
        // Normal: only process documents without keywords
        $res = InternalLinksTool_Keywords::run_batch($batch_size);
      } else {
        throw new Exception('Keywords class not available.');
      }

      $batch_processed = (int)($res['processed'] ?? 0);
      $batch_saved = (int)($res['saved'] ?? 0);
      $batch_errors = (int)($res['errors'] ?? 0);
      $new_last_doc_id = (int)($res['last_doc_id'] ?? $last_doc_id);

      $processed += $batch_processed;
      $saved += $batch_saved;
      $errors += $batch_errors;

      $job['processed'] = $processed;
      $job['saved'] = $saved;
      $job['errors'] = $errors;
      $job['last_doc_id'] = $new_last_doc_id;
      $job['last_run'] = current_time('mysql');

      if ($batch_processed === 0 || $processed >= $total_target) {
        $job['status'] = 'completed';
        $job['message'] = 'Completed! Processed: ' . $processed . ', Saved: ' . $saved . ', Errors: ' . $errors . '.';
      } else {
        $job['message'] = 'Running... Processed: ' . $processed . '/' . $total_target . ', Saved: ' . $saved . '.';
        // Schedule next batch in 10 seconds (give API time)
        wp_schedule_single_event(time() + 10, 'internallinkstool_keywords_cron');
      }

    } catch (Exception $e) {
      $errors++;
      $job['errors'] = $errors;
      $job['message'] = 'Error: ' . $e->getMessage();
      $job['status'] = 'error';
    }

    update_option(self::OPT_KEYWORDS_JOB, $job, false);
  }

  /**
   * Run keyword extraction on ALL documents (for force re-extract)
   */
  private static function run_keywords_batch_force($batch_size, $last_doc_id) {
    if (!class_exists('InternalLinksTool_DB')) {
      throw new Exception('DB class not loaded.');
    }
    global $wpdb;

    $docs = InternalLinksTool_DB::table('documents');
    $keywords_table = InternalLinksTool_DB::table('keywords');

    // Get settings
    $settings = class_exists('InternalLinksTool_Admin') ? InternalLinksTool_Admin::get_settings() : [];

    $types = [];
    if (!empty($settings['include_posts'])) $types[] = 'post';
    if (!empty($settings['include_pages'])) $types[] = 'page';
    if (empty($types)) $types = ['post', 'page'];

    $statuses = ['publish'];
    if (!empty($settings['scan_statuses']) && is_array($settings['scan_statuses'])) {
      $statuses = array_values(array_intersect($settings['scan_statuses'], ['publish', 'draft']));
      if (empty($statuses)) $statuses = ['publish'];
    }

    $type_placeholders = implode(',', array_fill(0, count($types), '%s'));
    $status_placeholders = implode(',', array_fill(0, count($statuses), '%s'));

    $robots_sql = '';
    if (!empty($settings['respect_robots'])) {
      $robots_sql = ' AND d.is_indexable = 1 AND d.is_robots_blocked = 0 ';
    }

    // Get ALL documents (paginated by last_doc_id)
    $sql = "SELECT d.id, d.post_id, d.url, d.meta_title, d.meta_desc, d.h1
            FROM {$docs} d
            WHERE d.type IN ({$type_placeholders})
              AND d.status IN ({$status_placeholders})
              AND d.id > %d
              {$robots_sql}
            ORDER BY d.id ASC
            LIMIT %d";

    $params = array_merge($types, $statuses, [$last_doc_id, $batch_size]);
    $rows = $wpdb->get_results($wpdb->prepare($sql, $params), ARRAY_A);

    if (!is_array($rows)) $rows = [];

    $processed = 0;
    $saved = 0;
    $errors = 0;
    $max_doc_id = $last_doc_id;

    foreach ($rows as $r) {
      $processed++;
      $doc_id = (int)($r['id'] ?? 0);
      if ($doc_id > $max_doc_id) $max_doc_id = $doc_id;

      $post_id = (int)($r['post_id'] ?? 0);
      $url = trim((string)($r['url'] ?? ''));
      $meta_title = trim((string)($r['meta_title'] ?? ''));
      $meta_desc = trim((string)($r['meta_desc'] ?? ''));
      $h1 = trim((string)($r['h1'] ?? ''));

      try {
        // Try to get better context from post if meta is empty
        if ($post_id > 0 && ($meta_title === '' || $meta_desc === '')) {
          $post = get_post($post_id);
          if ($post) {
            if ($meta_title === '') $meta_title = $post->post_title;
            if ($meta_desc === '' && !empty($post->post_content)) {
              $meta_desc = wp_trim_words(wp_strip_all_tags($post->post_content), 40, '...');
            }
          }
        }

        // Call OpenAI redo_keywords for variation
        if (class_exists('InternalLinksTool_OpenAI') && method_exists('InternalLinksTool_OpenAI', 'redo_keywords')) {
          $result = InternalLinksTool_OpenAI::redo_keywords($meta_title, $meta_desc, $h1);
        } elseif (class_exists('InternalLinksTool_OpenAI') && method_exists('InternalLinksTool_OpenAI', 'extract_keywords')) {
          $result = InternalLinksTool_OpenAI::extract_keywords($meta_title, $meta_desc, $h1);
        } else {
          throw new Exception('OpenAI class not available.');
        }

        if (!empty($result['error'])) {
          throw new Exception($result['error']);
        }

        $primary = trim((string)($result['primary_keyword'] ?? ''));
        $secondary_arr = $result['secondary_keywords'] ?? [];
        if (!is_array($secondary_arr)) $secondary_arr = [];
        $secondary_arr = array_values(array_unique(array_filter(array_map('trim', $secondary_arr))));
        $secondary_csv = implode(', ', array_slice($secondary_arr, 0, 8));

        if ($primary === '') {
          $errors++;
          continue;
        }

        // Save/update keywords
        $exists = $wpdb->get_var($wpdb->prepare(
          "SELECT id FROM {$keywords_table} WHERE document_id = %d LIMIT 1",
          $doc_id
        ));

        $data = [
          'primary_keyword' => $primary,
          'secondary_keywords' => $secondary_csv,
          'source' => 'ai-bg',
          'updated_at' => current_time('mysql'),
        ];

        if ($exists) {
          $wpdb->update($keywords_table, $data, ['document_id' => $doc_id]);
        } else {
          $data['document_id'] = $doc_id;
          $wpdb->insert($keywords_table, $data);
        }

        $saved++;

      } catch (Exception $e) {
        $errors++;
      }
    }

    return [
      'processed' => $processed,
      'saved' => $saved,
      'errors' => $errors,
      'last_doc_id' => $max_doc_id,
    ];
  }

  public static function get_keywords_job_status() {
    return get_option(self::OPT_KEYWORDS_JOB, []);
  }

  /**
   * Clear job status (for reset)
   */
  public static function clear_scanner_job() {
    delete_option(self::OPT_SCANNER_JOB);
  }

  public static function clear_keywords_job() {
    delete_option(self::OPT_KEYWORDS_JOB);
  }
}
