<?php
if (!defined('ABSPATH')) exit;

class InternalLinksTool_OpenAI {

    public static function api_key() {
        // Try new unified settings first
        if (class_exists('InternalLinksTool_Admin')) {
            $settings = InternalLinksTool_Admin::get_settings();
            if (!empty($settings['openai_api_key'])) {
                return trim((string)$settings['openai_api_key']);
            }
        }
        // Fallback to legacy option for backward compatibility
        $k = get_option('internallinkstool_openai_api_key', '');
        return is_string($k) ? trim($k) : '';
    }

    /**
     * Returns array:
     * [
     *   'primary_keyword' => string,
     *   'secondary_keywords' => array,
     *   'confidence' => float (0..1),
     *   'error' => string|null
     * ]
     */
    public static function extract_keywords($meta_title, $meta_description, $h1, $url = '', $yoast_keyword = '', $content_paragraph = '') {
        $key = self::api_key();
        if ($key === '') {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'Missing OpenAI API key',
            ];
        }

        $prompt = self::build_prompt($meta_title, $meta_description, $h1, $url, $yoast_keyword, $content_paragraph);

        $payload = [
            'model' => 'gpt-4o-mini',
            'temperature' => 0,
            'messages' => [
                [
                    'role' => 'system',
                    'content' => 'You are an SEO keyword classification engine. Return valid JSON only.'
                ],
                ['role' => 'user', 'content' => $prompt],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 30,
            'headers' => [
                'Authorization' => 'Bearer ' . $key,
                'Content-Type'  => 'application/json',
            ],
            'body' => wp_json_encode($payload),
        ]);

        if (is_wp_error($res)) {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => $res->get_error_message(),
            ];
        }

        $code = (int) wp_remote_retrieve_response_code($res);
        $body = (string) wp_remote_retrieve_body($res);

        if ($code < 200 || $code >= 300) {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'OpenAI HTTP ' . $code . ': ' . substr($body, 0, 200),
            ];
        }

        $json = json_decode($body, true);
        $content = $json['choices'][0]['message']['content'] ?? '';

        if (!is_string($content) || trim($content) === '') {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'Empty model response',
            ];
        }

        $out = json_decode($content, true);
        if (!is_array($out)) {
            return [
                'primary_keyword' => '',
                'primary_keywords' => [],
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'Model did not return JSON',
            ];
        }

        // Handle both new format (primary_keywords array) and old format (primary_keyword string)
        $primaries = [];
        if (isset($out['primary_keywords']) && is_array($out['primary_keywords'])) {
            $primaries = $out['primary_keywords'];
        } elseif (isset($out['primary_keyword'])) {
            $primaries = [(string)$out['primary_keyword']];
        }

        $secondary = $out['secondary_keywords'] ?? [];
        if (!is_array($secondary)) $secondary = [];

        // Normalize / clean primaries
        $primaries = array_values(array_filter(array_map(function ($x) {
            $x = trim((string) $x);
            return $x === '' ? null : $x;
        }, $primaries)));

        // Normalize / clean secondaries
        $secondary = array_values(array_filter(array_map(function ($x) {
            $x = trim((string) $x);
            return $x === '' ? null : $x;
        }, $secondary)));

        // Confidence
        $conf = isset($out['confidence']) ? (float) $out['confidence'] : 0.75;
        $conf = max(0.0, min(1.0, $conf));

        // --- Guardrails (enforce your rules) ---
        // Primaries: up to 5 terms, each 1-6 words
        $primaries = array_slice($primaries, 0, 5);
        $primaries = array_values(array_filter(array_map(function ($kw) {
            $kw = self::trim_to_word_count($kw, 6);
            return $kw === '' ? null : $kw;
        }, $primaries)));

        // Secondaries: up to 2 terms, each 1-5 words
        $secondary = array_slice($secondary, 0, 2);
        $secondary = array_values(array_filter(array_map(function ($kw) {
            $kw = self::trim_to_word_count($kw, 5);
            return $kw === '' ? null : $kw;
        }, $secondary)));

        // For backwards compatibility: primary_keyword = first primary
        $primary = !empty($primaries) ? $primaries[0] : '';

        return [
            'primary_keyword' => $primary,           // backwards compatible (first primary)
            'primary_keywords' => $primaries,        // new: all primaries
            'secondary_keywords' => $secondary,
            'confidence' => $conf,
            'error' => null,
        ];
    }

    private static function build_prompt($meta_title, $meta_description, $h1, $url = '', $yoast_keyword = '', $content_paragraph = '') {
        $meta_title = trim((string) $meta_title);
        $meta_description = trim((string) $meta_description);
        $h1 = trim((string) $h1);
        $url = trim((string) $url);
        $yoast_keyword = trim((string) $yoast_keyword);
        $content_paragraph = trim((string) $content_paragraph);

        // Extract slug from URL for analysis
        $slug = '';
        if ($url !== '') {
            $path = wp_parse_url($url, PHP_URL_PATH);
            if ($path) {
                $path = trim($path, '/');
                $segments = explode('/', $path);
                $slug = end($segments);
                $slug = str_replace(['-', '_'], ' ', $slug);
                $slug = trim($slug);
            }
        }

        // Yoast override: when set, force it as primary keyword
        $yoast_override = '';
        if ($yoast_keyword !== '') {
            $yoast_override =
                "\nIMPORTANT OVERRIDE — YOAST FOCUS KEYWORD\n" .
                "The site owner has explicitly set the Yoast focus keyword: \"{$yoast_keyword}\"\n" .
                "Use it as the primary_keyword exactly (you may only adjust to Title Case).\n" .
                "Do NOT replace it with synonyms, rephrasings, or alternative terms.\n" .
                "Focus on finding the best secondary keyword that complements this primary.\n";
        }

        $content_hint = '';
        if ($content_paragraph !== '') {
            $content_hint = "- page_content_excerpt: " . $content_paragraph . "\n";
        }

        return
            "You are an SEO keyword classification engine for ANY website type (blogs, guides, ecommerce, SaaS pages, docs, service pages, news, reviews, etc.).\n\n" .

            "Your task: extract\n" .
            "1) FIVE Primary Keywords (the main keyword + 4 close variations)\n" .
            "2) TWO Secondary Keywords (supporting/contextual terms)\n\n" .

            "You must follow the rules strictly and return only valid JSON.\n\n" .

            "–––––––––––––––––\n" .
            "INPUT FIELDS\n" .
            "–––––––––––––––––\n" .
            "- H1: " . $h1 . "\n" .
            "- Meta Title: " . $meta_title . "\n" .
            "- Meta Description: " . $meta_description . "\n" .
            ($slug !== '' ? "- URL Slug: " . $slug . "\n" : '') .
            ($yoast_keyword !== '' ? "- Yoast Focus Keyword: " . $yoast_keyword . "\n" : '') .
            $content_hint .
            "\n" .

            "–––––––––––––––––\n" .
            "CORE IDEA\n" .
            "–––––––––––––––––\n" .
            "Primary Keywords = the MAIN topic/search intent of the page + variations that could serve as anchor text.\n" .
            "  - First primary: the exact main keyword (the \"page's name\" as a query)\n" .
            "  - Other primaries: close variations (reworded, with/without brand, shorter/longer forms)\n" .
            "Secondary Keywords = supporting topics/categories/audience/features that help contextualize the page.\n\n" .
            "Do NOT invent keywords. Only use what is clearly implied by the inputs.\n\n" .

            $yoast_override .

            "–––––––––––––––––\n" .
            "HOW TO IDENTIFY PRIMARY KEYWORD (UNIVERSAL)\n" .
            "–––––––––––––––––\n\n" .

            "Primary keyword should be the best phrase that answers:\n" .
            "\"What would a user type into Google to reach THIS page?\"\n\n" .

            "Use this scoring logic:\n\n" .

            "A) MUST match the dominant phrase that appears in BOTH:\n" .
            "   - H1\n" .
            "   - Meta Title\n" .
            "   (If there is no exact overlap, choose the closest equivalent phrase that clearly represents the same topic.)\n\n" .

            "B) Prefer phrases that include a UNIQUE entity when present:\n" .
            "   - Brand/product/person/organization/location/tool name\n" .
            "   Example: \"Versa Networks\", \"Paris\", \"Salesforce\", \"Kubernetes\", \"Ahrefs\"\n\n" .

            "C) If no clear entity exists, choose the most specific descriptive topic phrase\n" .
            "   (not generic terms like \"guide\", \"blog\", \"services\", \"welcome\").\n\n" .

            "D) If the H1 includes a colon or dash, the PRIMARY keyword is usually the part BEFORE the colon/dash\n" .
            "   unless the second part is essential to make it specific.\n" .
            "   Example:\n" .
            "   - \"Email Marketing: A Beginner's Guide\" -> Primary: \"Email Marketing\"\n" .
            "   - \"Kubernetes Security Best Practices\" -> Primary: \"Kubernetes Security\"\n\n" .

            "E) Slug is a tie-breaker:\n" .
            "   - If multiple candidates exist, prefer the one most reflected in the slug tokens.\n\n" .

            "–––––––––––––––––\n" .
            "HOW TO IDENTIFY SECONDARY KEYWORD (UNIVERSAL)\n" .
            "–––––––––––––––––\n\n" .

            "Secondary keyword should be a meaningful phrase that:\n" .
            "- Supports or narrows the primary topic via:\n" .
            "  - category/solution area (e.g. \"SASE Solutions\", \"SD-WAN\", \"Identity Security\")\n" .
            "  - audience/use case (e.g. \"For Small Businesses\", \"For Enterprises\")\n" .
            "  - feature or subtopic (e.g. \"Pricing\", \"Integrations\", \"Best Practices\", \"Troubleshooting\")\n" .
            "  - geography (e.g. \"In Israel\", \"New York City\") when relevant\n\n" .

            "Extraction rules:\n" .
            "1) Prefer a phrase that appears in the Meta Description.\n" .
            "2) If the Meta Description has no good candidate, use the part AFTER the colon/dash in the H1 or Meta Title.\n" .
            "3) It may appear in Title/H1/Slug too, but it must be LESS dominant than the primary topic.\n" .
            "4) It must NOT be:\n" .
            "   - identical to the primary keyword\n" .
            "   - a trivial variation of the primary keyword\n" .
            "   - a single generic word unless it is a standard category term (e.g. \"SD-WAN\", \"SASE\", \"CRM\")\n" .
            "5) If the primary keyword contains a brand, product, or entity name,\n" .
            "   the secondary keyword SHOULD also include that brand/entity.\n" .
            "   Example: Primary \"Plufl Dog Bed\" -> Secondary should be \"Plufl Dog Bed For Humans\" or \"Plufl Pet Furniture\", NOT just \"Dog Bed\".\n" .
            "   The brand/entity is usually the first distinctive word(s) in the primary keyword.\n\n" .

            "–––––––––––––––––\n" .
            "NORMALIZATION RULES\n" .
            "–––––––––––––––––\n" .
            "- Return keywords in Title Case (keep acronyms uppercase: SD-WAN, SASE, CRM, API).\n" .
            "- Remove filler words when possible (e.g. \"comprehensive\", \"ultimate\", \"complete\") unless they are part of the meaning.\n" .
            "- Keep the phrase natural and short:\n" .
            "  - Primary: ideally 2-6 words (can be longer if needed)\n" .
            "  - Secondary: ideally 1-5 words (can be longer if needed)\n\n" .

            "–––––––––––––––––\n" .
            "OUTPUT (VERY IMPORTANT)\n" .
            "–––––––––––––––––\n" .
            "Return ONLY valid JSON. No explanation. No markdown.\n\n" .

            "{\n" .
            "  \"primary_keywords\": [\"Main Keyword\", \"Variation 1\", \"Variation 2\", \"Variation 3\", \"Variation 4\"],\n" .
            "  \"secondary_keywords\": [\"Secondary 1\", \"Secondary 2\"],\n" .
            "  \"confidence\": 0.0\n" .
            "}\n\n" .

            "- primary_keywords: a JSON array with exactly FIVE strings (main keyword first, then 4 variations).\n" .
            "- secondary_keywords: a JSON array with exactly TWO strings (supporting keywords).\n" .
            "- confidence: a number between 0 and 1.\n";
    }

    private static function trim_to_word_count($text, $max_words) {
        $text = trim((string) $text);
        if ($text === '' || $max_words <= 0) return '';

        // Normalize whitespace
        $text = preg_replace('/\s+/u', ' ', $text);
        $parts = preg_split('/\s+/u', $text);

        if (!is_array($parts) || empty($parts)) return '';
        if (count($parts) <= $max_words) return $text;

        $parts = array_slice($parts, 0, $max_words);
        return trim(implode(' ', $parts));
    }

    /**
     * Re-extract keywords with slight variation (for Redo button).
     * Uses the SAME prompt as extract_keywords but with slightly higher temperature.
     */
    public static function redo_keywords($meta_title, $meta_description, $h1, $url = '', $yoast_keyword = '', $content_paragraph = '') {
        $key = self::api_key();
        if ($key === '') {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'Missing OpenAI API key',
            ];
        }

        // Use the exact same prompt as extract_keywords
        $prompt = self::build_prompt($meta_title, $meta_description, $h1, $url, $yoast_keyword, $content_paragraph);

        $payload = [
            'model' => 'gpt-4o-mini',
            'temperature' => 0.7,  // Higher temperature to get variation in primary keyword too
            'messages' => [
                [
                    'role' => 'system',
                    'content' => 'You are an SEO keyword classification engine. Return valid JSON only.'
                ],
                ['role' => 'user', 'content' => $prompt],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 30,
            'headers' => [
                'Authorization' => 'Bearer ' . $key,
                'Content-Type'  => 'application/json',
            ],
            'body' => wp_json_encode($payload),
        ]);

        if (is_wp_error($res)) {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => $res->get_error_message(),
            ];
        }

        $code = (int) wp_remote_retrieve_response_code($res);
        $body = (string) wp_remote_retrieve_body($res);

        if ($code < 200 || $code >= 300) {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'OpenAI HTTP ' . $code . ': ' . substr($body, 0, 200),
            ];
        }

        $json = json_decode($body, true);
        $content = $json['choices'][0]['message']['content'] ?? '';

        if (!is_string($content) || trim($content) === '') {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'Empty model response',
            ];
        }

        $out = json_decode($content, true);
        if (!is_array($out)) {
            return [
                'primary_keyword' => '',
                'secondary_keywords' => [],
                'confidence' => 0.0,
                'error' => 'Model did not return JSON',
            ];
        }

        $primary = isset($out['primary_keyword']) ? trim((string) $out['primary_keyword']) : '';
        $secondary = $out['secondary_keywords'] ?? [];
        if (!is_array($secondary)) $secondary = [];

        $secondary = array_values(array_filter(array_map(function ($x) {
            $x = trim((string) $x);
            return $x === '' ? null : $x;
        }, $secondary)));

        $conf = isset($out['confidence']) ? (float) $out['confidence'] : 0.75;
        $conf = max(0.0, min(1.0, $conf));

        // Same guardrails as extract_keywords
        $primary = self::trim_to_word_count($primary, 6);
        $secondary = array_slice($secondary, 0, 1);
        $secondary = array_values(array_filter(array_map(function ($kw) {
            $kw = self::trim_to_word_count($kw, 5);
            return $kw === '' ? null : $kw;
        }, $secondary)));

        return [
            'primary_keyword' => $primary,
            'secondary_keywords' => $secondary,
            'confidence' => $conf,
            'error' => null,
        ];
    }

    /**
     * Rewrite anchor text to be more natural in context.
     *
     * @param string $original_anchor The matched anchor text
     * @param string $sentence The sentence containing the anchor
     * @param string $target_topic Brief description of the target page topic
     * @return array ['rewritten' => string, 'changed' => bool, 'reason' => string, 'error' => string|null]
     */
    public static function rewrite_anchor($original_anchor, $sentence, $target_topic = '') {
        $key = self::api_key();
        if ($key === '') {
            return ['rewritten' => $original_anchor, 'changed' => false, 'reason' => '', 'error' => 'Missing API key'];
        }

        $original = trim((string)$original_anchor);
        $sentence = trim((string)$sentence);

        if ($original === '' || $sentence === '') {
            return ['rewritten' => $original, 'changed' => false, 'reason' => '', 'error' => 'Empty input'];
        }

        $topic_hint = trim((string)$target_topic);
        $topic_str = $topic_hint !== '' ? "\nTarget page topic: {$topic_hint}" : '';

        $system = "You are an SEO copywriter specializing in natural anchor text for internal links. Return ONLY valid JSON. No prose.";

        $user = "Analyze this anchor text in context and suggest a more natural or extended version if appropriate.\n\n"
            . "Sentence: \"{$sentence}\"\n"
            . "Current anchor: \"{$original}\""
            . $topic_str
            . "\n\nRules:\n"
            . "- If the anchor already reads naturally, keep it unchanged\n"
            . "- If it can be improved, suggest a slightly expanded or reworded version\n"
            . "- The rewritten anchor MUST be a substring that exists in the sentence\n"
            . "- Keep it 1-6 words maximum\n"
            . "- Don't add generic words like 'click here' or 'learn more'\n"
            . "- Preserve the original meaning and intent\n"
            . "- Only suggest changes if they genuinely improve readability\n\n"
            . "Return ONLY this JSON:\n"
            . "{\"rewritten\": \"...\", \"reason\": \"brief explanation or 'no change needed'\"}\n";

        $payload = [
            'model' => 'gpt-4o-mini',
            'temperature' => 0.2,
            'messages' => [
                ['role' => 'system', 'content' => $system],
                ['role' => 'user', 'content' => $user],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 15,
            'headers' => [
                'Authorization' => 'Bearer ' . $key,
                'Content-Type'  => 'application/json',
            ],
            'body' => wp_json_encode($payload),
        ]);

        if (is_wp_error($res)) {
            return ['rewritten' => $original, 'changed' => false, 'reason' => '', 'error' => $res->get_error_message()];
        }

        $code = (int) wp_remote_retrieve_response_code($res);
        $body = (string) wp_remote_retrieve_body($res);

        if ($code < 200 || $code >= 300) {
            return ['rewritten' => $original, 'changed' => false, 'reason' => '', 'error' => 'HTTP ' . $code];
        }

        $json = json_decode($body, true);
        $content = $json['choices'][0]['message']['content'] ?? '';

        if (!is_string($content) || trim($content) === '') {
            return ['rewritten' => $original, 'changed' => false, 'reason' => '', 'error' => 'Empty response'];
        }

        $out = json_decode($content, true);
        if (!is_array($out) || !isset($out['rewritten'])) {
            return ['rewritten' => $original, 'changed' => false, 'reason' => '', 'error' => 'Invalid JSON'];
        }

        $rewritten = trim((string)$out['rewritten']);
        $reason = isset($out['reason']) ? trim((string)$out['reason']) : '';

        // Validate: not empty, not too long
        if ($rewritten === '') {
            return ['rewritten' => $original, 'changed' => false, 'reason' => $reason, 'error' => null];
        }

        $word_count = count(preg_split('/\s+/u', $rewritten));
        if ($word_count > 6) {
            $rewritten = $original; // Too long, use original
        }

        // Verify rewritten text exists in sentence (case-insensitive)
        if (stripos($sentence, $rewritten) === false) {
            // Rewritten anchor doesn't exist in sentence, fall back to original
            return ['rewritten' => $original, 'changed' => false, 'reason' => 'suggested text not in sentence', 'error' => null];
        }

        $changed = (strtolower($rewritten) !== strtolower($original));

        return [
            'rewritten' => $rewritten,
            'changed' => $changed,
            'reason' => $reason,
            'error' => null,
        ];
    }

    /**
     * Generate alternative anchor text suggestions for a target page.
     * Used by "Redo" button to get fresh AI suggestions.
     *
     * @param string $current_anchor The current anchor text
     * @param string $sentence_context The sentence where the link will appear
     * @param string $target_title Target page title
     * @param string $target_desc Target page description
     * @return array ['suggestion' => string, 'alternatives' => array, 'reason' => string, 'error' => string|null]
     */
    public static function suggest_anchor($current_anchor, $sentence_context, $target_title, $target_desc = '') {
        $key = self::api_key();
        if ($key === '') {
            return ['suggestion' => $current_anchor, 'alternatives' => [], 'reason' => '', 'error' => 'Missing API key'];
        }

        $current = trim((string)$current_anchor);
        $sentence = trim((string)$sentence_context);
        $title = trim((string)$target_title);
        $desc = trim((string)$target_desc);

        if ($title === '' && $current === '') {
            return ['suggestion' => $current, 'alternatives' => [], 'reason' => '', 'error' => 'Missing target info'];
        }

        $system = "You are an SEO analyst focused on internal linking strategy. Return valid JSON only.";

        $user = "Suggest anchor text options for an internal link to a target page.\n\n"
            . "Target page information:\n"
            . "- Title: {$title}\n"
            . ($desc !== '' ? "- Description: {$desc}\n" : '')
            . ($current !== '' ? "\nCurrent anchor being used: \"{$current}\"\n" : '')
            . "\nRules:\n"
            . "- Primary keyword: the single best anchor text for linking to this page (1-5 words)\n"
            . "- Secondary keywords: 2-3 alternative anchor variations (each 1-5 words)\n"
            . "- Anchors should be natural, SEO-friendly terms that describe the target page\n"
            . "- Avoid generic anchors like 'click here', 'read more', 'learn more'\n"
            . "- Do not capitalize unless it's a proper noun\n"
            . "- Consider synonyms and related terms\n\n"
            . "Return ONLY this JSON:\n"
            . "{\"suggestion\": \"best anchor\", \"alternatives\": [\"alt1\", \"alt2\"], \"reason\": \"brief explanation\"}\n";

        $payload = [
            'model' => 'gpt-4o-mini',
            'temperature' => 0.7,  // Higher temperature to get variation in suggestions
            'messages' => [
                ['role' => 'system', 'content' => $system],
                ['role' => 'user', 'content' => $user],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 20,
            'headers' => [
                'Authorization' => 'Bearer ' . $key,
                'Content-Type'  => 'application/json',
            ],
            'body' => wp_json_encode($payload),
        ]);

        if (is_wp_error($res)) {
            return ['suggestion' => $current, 'alternatives' => [], 'reason' => '', 'error' => $res->get_error_message()];
        }

        $code = (int) wp_remote_retrieve_response_code($res);
        $body = (string) wp_remote_retrieve_body($res);

        if ($code < 200 || $code >= 300) {
            return ['suggestion' => $current, 'alternatives' => [], 'reason' => '', 'error' => 'HTTP ' . $code];
        }

        $json = json_decode($body, true);
        $content = $json['choices'][0]['message']['content'] ?? '';

        if (!is_string($content) || trim($content) === '') {
            return ['suggestion' => $current, 'alternatives' => [], 'reason' => '', 'error' => 'Empty response'];
        }

        $out = json_decode($content, true);
        if (!is_array($out) || !isset($out['suggestion'])) {
            return ['suggestion' => $current, 'alternatives' => [], 'reason' => '', 'error' => 'Invalid JSON'];
        }

        $suggestion = trim((string)$out['suggestion']);
        $alternatives = isset($out['alternatives']) && is_array($out['alternatives']) ? $out['alternatives'] : [];
        $reason = isset($out['reason']) ? trim((string)$out['reason']) : '';

        // Clean up alternatives
        $alternatives = array_values(array_filter(array_map(function($a) {
            $a = trim((string)$a);
            return $a !== '' ? $a : null;
        }, $alternatives)));
        $alternatives = array_slice($alternatives, 0, 3);

        // Validate suggestion
        if ($suggestion === '') {
            $suggestion = $current;
        }

        // Enforce word limit
        $suggestion = self::trim_to_word_count($suggestion, 5);
        $alternatives = array_map(function($a) {
            return self::trim_to_word_count($a, 5);
        }, $alternatives);

        return [
            'suggestion' => $suggestion,
            'alternatives' => $alternatives,
            'reason' => $reason,
            'error' => null,
        ];
    }

    /**
     * Generate anchor bank for a target page.
     * Returns anchor variations for each type: exact, partial, descriptive, contextual, generic
     *
     * @param string $primary_keyword The primary keyword for the page
     * @param string $secondary_keywords Comma-separated secondary keywords
     * @param string $meta_title Page meta title
     * @param string $meta_desc Page meta description
     * @return array ['exact' => [...], 'partial' => [...], 'descriptive' => [...], 'contextual' => [...], 'generic' => [...], 'error' => string|null]
     */
    public static function generate_anchor_bank($primary_keyword, $secondary_keywords = '', $meta_title = '', $meta_desc = '') {
        $key = self::api_key();
        if ($key === '') {
            return [
                'exact' => [],
                'partial' => [],
                'descriptive' => [],
                'contextual' => [],
                'generic' => [],
                'error' => 'Missing OpenAI API key',
            ];
        }

        $primary = trim((string)$primary_keyword);
        $secondary = trim((string)$secondary_keywords);
        $title = trim((string)$meta_title);
        $desc = trim((string)$meta_desc);

        if ($primary === '') {
            return [
                'exact' => [],
                'partial' => [],
                'descriptive' => [],
                'contextual' => [],
                'generic' => [],
                'error' => 'Missing primary keyword',
            ];
        }

        $system = "You are an SEO expert specializing in internal linking anchor text optimization. Return valid JSON only.";

        $user = "Generate a bank of anchor text variations for internal links pointing to a page.\n\n"
            . "Target page information:\n"
            . "- Primary keyword: {$primary}\n"
            . ($secondary !== '' ? "- Secondary keywords: {$secondary}\n" : '')
            . ($title !== '' ? "- Meta title: {$title}\n" : '')
            . ($desc !== '' ? "- Meta description: {$desc}\n" : '')
            . "\nGenerate anchor text variations for each type:\n\n"
            . "1. EXACT (3-5 variations): MUST contain ALL words from the primary keyword — no words removed. Allowed changes: reordering words in any order, singular/plural, adding 1 small connecting word (e.g. 'for', 'on', 'from'). Examples: primary 'amazon dog bed' → exact could be 'dog bed amazon', 'amazon dog beds', 'dog bed from amazon'. Primary 'human giant dog bed' → exact could be 'giant human dog bed' or 'human giant dog beds', but NOT 'human dog bed' (missing 'giant').\n"
            . "2. PARTIAL (4-6 variations): Semantic variations, synonyms, related phrases that convey the same meaning\n"
            . "3. DESCRIPTIVE (3-5 variations): Topic expansion phrases like 'complete guide to X', 'everything about X', 'X explained'\n"
            . "4. CONTEXTUAL (3-5 variations): Natural phrases that fit in sentences, e.g., 'when choosing X', 'if you need X', 'looking for X'\n"
            . "5. GENERIC (3-4 variations): Relevant but general anchors like 'this guide', 'our review', 'learn more', 'read here'\n\n"
            . "Rules:\n"
            . "- Each anchor should be 1-6 words\n"
            . "- Do not capitalize unless proper noun\n"
            . "- Make anchors natural and varied\n"
            . "- Avoid spammy or over-optimized text\n"
            . "- Generic anchors should still be contextually relevant\n\n"
            . "Return ONLY this JSON format:\n"
            . "{\n"
            . "  \"exact\": [\"anchor1\", \"anchor2\", ...],\n"
            . "  \"partial\": [\"anchor1\", \"anchor2\", ...],\n"
            . "  \"descriptive\": [\"anchor1\", \"anchor2\", ...],\n"
            . "  \"contextual\": [\"anchor1\", \"anchor2\", ...],\n"
            . "  \"generic\": [\"anchor1\", \"anchor2\", ...]\n"
            . "}\n";

        $payload = [
            'model' => 'gpt-4o-mini',
            'temperature' => 0.7,
            'messages' => [
                ['role' => 'system', 'content' => $system],
                ['role' => 'user', 'content' => $user],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 30,
            'headers' => [
                'Authorization' => 'Bearer ' . $key,
                'Content-Type'  => 'application/json',
            ],
            'body' => wp_json_encode($payload),
        ]);

        if (is_wp_error($res)) {
            return [
                'exact' => [],
                'partial' => [],
                'descriptive' => [],
                'contextual' => [],
                'generic' => [],
                'error' => $res->get_error_message(),
            ];
        }

        $code = (int) wp_remote_retrieve_response_code($res);
        $body = (string) wp_remote_retrieve_body($res);

        if ($code < 200 || $code >= 300) {
            return [
                'exact' => [],
                'partial' => [],
                'descriptive' => [],
                'contextual' => [],
                'generic' => [],
                'error' => 'HTTP ' . $code . ': ' . substr($body, 0, 200),
            ];
        }

        $json = json_decode($body, true);
        $content = $json['choices'][0]['message']['content'] ?? '';

        if (!is_string($content) || trim($content) === '') {
            return [
                'exact' => [],
                'partial' => [],
                'descriptive' => [],
                'contextual' => [],
                'generic' => [],
                'error' => 'Empty response',
            ];
        }

        $out = json_decode($content, true);
        if (!is_array($out)) {
            return [
                'exact' => [],
                'partial' => [],
                'descriptive' => [],
                'contextual' => [],
                'generic' => [],
                'error' => 'Invalid JSON response',
            ];
        }

        // Extract and clean each type
        $types = ['exact', 'partial', 'descriptive', 'contextual', 'generic'];
        $result = ['error' => null];

        foreach ($types as $type) {
            $anchors = isset($out[$type]) && is_array($out[$type]) ? $out[$type] : [];
            $anchors = array_values(array_filter(array_map(function($a) {
                $a = trim((string)$a);
                return $a !== '' ? $a : null;
            }, $anchors)));
            // Limit and trim word count
            $anchors = array_slice($anchors, 0, 8);
            $anchors = array_map(function($a) {
                return self::trim_to_word_count($a, 6);
            }, $anchors);
            $result[$type] = $anchors;
        }

        return $result;
    }

    /**
     * Rank anchor relevance among multiple source-target candidates.
     * Used for anchor deduplication when multiple sources want the same anchor text.
     *
     * @param string $anchor_text The contested anchor text
     * @param array $candidates Array of candidate objects with source/target info
     * @return array ['winner_index' => int, 'reason' => string, 'error' => string|null]
     */
    public static function rank_anchor_relevance($anchor_text, $candidates) {
        $key = self::api_key();
        if ($key === '') {
            return ['winner_index' => 0, 'reason' => 'No API key, defaulting to highest relevance', 'error' => 'Missing API key'];
        }

        if (empty($candidates)) {
            return ['winner_index' => 0, 'reason' => 'No candidates', 'error' => 'Empty candidates list'];
        }

        // Build candidate descriptions
        $candidate_lines = [];
        foreach ($candidates as $idx => $c) {
            $candidate_lines[] = sprintf(
                "Candidate %d:\n  Source: \"%s\" (%s)\n  Target: \"%s\" (%s)\n  Relevance score: %d",
                $idx,
                (string)($c['source_title'] ?? ''),
                (string)($c['source_url'] ?? ''),
                (string)($c['target_title'] ?? ''),
                (string)($c['target_url'] ?? ''),
                (int)($c['relevance_score'] ?? 0)
            );
        }

        $system = "You are an SEO analyst specializing in internal linking strategy. Return valid JSON only.";

        $user = "Multiple source pages could use the anchor text \"{$anchor_text}\" for an internal link. "
            . "Pick the best source-target pair where this anchor text is most natural and relevant.\n\n"
            . "Candidates:\n" . implode("\n\n", $candidate_lines) . "\n\n"
            . "Rules:\n"
            . "- Pick the candidate where the anchor text fits most naturally in the source content\n"
            . "- Consider semantic relevance between source topic and anchor text\n"
            . "- Consider how well the anchor describes the target page\n"
            . "- Higher relevance scores indicate better topical match\n\n"
            . "Return ONLY this JSON:\n"
            . "{\"winner_index\": 0, \"reason\": \"brief explanation\"}\n";

        $payload = [
            'model' => 'gpt-4o-mini',
            'temperature' => 0,
            'messages' => [
                ['role' => 'system', 'content' => $system],
                ['role' => 'user', 'content' => $user],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $res = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 20,
            'headers' => [
                'Authorization' => 'Bearer ' . $key,
                'Content-Type'  => 'application/json',
            ],
            'body' => wp_json_encode($payload),
        ]);

        if (is_wp_error($res)) {
            return ['winner_index' => 0, 'reason' => '', 'error' => $res->get_error_message()];
        }

        $code = (int) wp_remote_retrieve_response_code($res);
        $body = (string) wp_remote_retrieve_body($res);

        if ($code < 200 || $code >= 300) {
            return ['winner_index' => 0, 'reason' => '', 'error' => 'HTTP ' . $code];
        }

        $json = json_decode($body, true);
        $content = $json['choices'][0]['message']['content'] ?? '';

        if (!is_string($content) || trim($content) === '') {
            return ['winner_index' => 0, 'reason' => '', 'error' => 'Empty response'];
        }

        $out = json_decode($content, true);
        if (!is_array($out) || !isset($out['winner_index'])) {
            return ['winner_index' => 0, 'reason' => '', 'error' => 'Invalid JSON'];
        }

        $winner = (int)$out['winner_index'];
        $reason = isset($out['reason']) ? trim((string)$out['reason']) : '';

        // Validate winner index
        if ($winner < 0 || $winner >= count($candidates)) {
            $winner = 0;
        }

        return [
            'winner_index' => $winner,
            'reason' => $reason,
            'error' => null,
        ];
    }

    /**
     * Resolve keyword conflicts: given pages sharing the same primary keyword,
     * assign unique primary + secondary keywords to each page via AI.
     */
    public static function resolve_keyword_conflicts($pages_array) {
        $key = self::api_key();
        if ($key === '') {
            return ['assignments' => [], 'error' => 'Missing OpenAI API key'];
        }

        if (empty($pages_array)) {
            return ['assignments' => [], 'error' => null];
        }

        // Build the pages description
        $pages_desc = '';
        foreach ($pages_array as $i => $p) {
            $idx = $i + 1;
            $pages_desc .= "Page {$idx}:\n";
            $pages_desc .= "  document_id: {$p['document_id']}\n";
            $pages_desc .= "  URL: {$p['url']}\n";
            $pages_desc .= "  H1: {$p['h1']}\n";
            $pages_desc .= "  Meta Title: {$p['meta_title']}\n";
            $pages_desc .= "  Meta Description: {$p['meta_desc']}\n";
            $pages_desc .= "  Current Primary: {$p['current_primary']}\n";
            $pages_desc .= "  Current Secondary: {$p['current_secondary']}\n";
            if (!empty($p['content_excerpt'])) {
                $pages_desc .= "  Content Excerpt: {$p['content_excerpt']}\n";
            }
            $pages_desc .= "\n";
        }

        $prompt = "You are an SEO keyword planner.\n\n" .
            "The following pages currently share the SAME primary keyword. " .
            "This is bad for SEO because each page should target a UNIQUE primary keyword.\n\n" .
            "Your task: assign a UNIQUE primary keyword and ONE secondary keyword to EACH page.\n\n" .
            "Rules:\n" .
            "1. No two pages can have the same primary keyword (case-insensitive).\n" .
            "2. The primary keyword must accurately reflect what the page is about (use URL, H1, Meta Title, content excerpt).\n" .
            "3. ONE page may keep the current primary keyword if it is the best fit for that keyword.\n" .
            "4. For the other pages, find a more specific or alternative primary keyword that still matches the page's actual topic.\n" .
            "5. Secondary keyword: one supporting term that complements the primary (category, feature, audience, etc.).\n" .
            "6. If the primary keyword contains a brand/entity name, preserve it in the new keyword when possible.\n" .
            "7. Keywords should be in Title Case (keep acronyms uppercase).\n" .
            "8. Primary: 2-6 words. Secondary: 1-5 words.\n\n" .
            "Pages:\n" . $pages_desc . "\n" .
            "Return ONLY valid JSON in this format:\n" .
            "{\n" .
            "  \"assignments\": [\n" .
            "    {\n" .
            "      \"document_id\": <number>,\n" .
            "      \"primary_keyword\": \"...\",\n" .
            "      \"secondary_keywords\": [\"...\"]\n" .
            "    }\n" .
            "  ]\n" .
            "}\n";

        $body = [
            'model'       => 'gpt-4o-mini',
            'temperature' => 0.3,
            'messages'    => [
                ['role' => 'system', 'content' => 'You are an SEO keyword planner. Return valid JSON only.'],
                ['role' => 'user',   'content' => $prompt],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $response = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 60,
            'headers' => [
                'Content-Type'  => 'application/json',
                'Authorization' => 'Bearer ' . $key,
            ],
            'body' => wp_json_encode($body),
        ]);

        if (is_wp_error($response)) {
            return ['assignments' => [], 'error' => $response->get_error_message()];
        }

        $data = json_decode(wp_remote_retrieve_body($response), true);
        $content = $data['choices'][0]['message']['content'] ?? '';
        $out = json_decode($content, true);

        if (!is_array($out) || !isset($out['assignments'])) {
            return ['assignments' => [], 'error' => 'Invalid AI response format'];
        }

        return ['assignments' => $out['assignments'], 'error' => null];
    }

    /**
     * Ask AI to pick the best anchor text from options (original vs expanded).
     * Evaluates from SEO perspective - does adding context words make sense?
     *
     * @param string $original The original matched anchor
     * @param array $expansions Possible expanded versions
     * @param string $sentence The sentence context
     * @return string|null Best anchor choice, or null on error
     */
    public static function pick_best_anchor($original, $expansions, $sentence) {
        $key = self::api_key();
        if ($key === '') return null;

        if (empty($expansions)) return $original;

        // Build options list
        $options = array_merge([$original], $expansions);
        $options_text = '';
        foreach ($options as $i => $opt) {
            $options_text .= ($i + 1) . ". \"" . $opt . "\"\n";
        }

        $prompt = "You are an SEO expert evaluating anchor text options for an internal link.\n\n" .
            "Sentence: \"" . $sentence . "\"\n\n" .
            "Anchor text options:\n" . $options_text . "\n" .
            "Rules:\n" .
            "- The anchor should be a natural noun phrase that describes the link destination\n" .
            "- Adding adjectives/modifiers (e.g., 'comfortable', 'large', 'premium') is GOOD if they describe the product\n" .
            "- Adding verbs (e.g., 'offers', 'provides', 'creates') is BAD - anchors should be noun phrases\n" .
            "- Adding unrelated words that break the noun phrase is BAD\n" .
            "- Shorter is better if the longer version doesn't add descriptive value\n\n" .
            "Return ONLY valid JSON: {\"best\": \"the chosen anchor text\"}\n" .
            "Choose the best anchor for SEO.";

        $body = [
            'model'       => 'gpt-4o-mini',
            'temperature' => 0.1,
            'max_tokens'  => 50,
            'messages'    => [
                ['role' => 'system', 'content' => 'You are an SEO expert. Return valid JSON only.'],
                ['role' => 'user',   'content' => $prompt],
            ],
            'response_format' => ['type' => 'json_object'],
        ];

        $response = wp_remote_post('https://api.openai.com/v1/chat/completions', [
            'timeout' => 15,
            'headers' => [
                'Content-Type'  => 'application/json',
                'Authorization' => 'Bearer ' . $key,
            ],
            'body' => wp_json_encode($body),
        ]);

        if (is_wp_error($response)) {
            return null;
        }

        $data = json_decode(wp_remote_retrieve_body($response), true);
        $content = $data['choices'][0]['message']['content'] ?? '';
        $out = json_decode($content, true);

        if (is_array($out) && isset($out['best'])) {
            // Verify the returned anchor is one of our options
            $best = trim($out['best']);
            foreach ($options as $opt) {
                if (strcasecmp($best, $opt) === 0) {
                    return $opt; // Return original casing
                }
            }
        }

        return null; // Fall back to original if AI response invalid
    }
}
