"""Task configurations and graders for BazaarBot."""

from __future__ import annotations

from .models import DealOutcome, DealRecord, SellerPersonalityType, TaskConfig


# ── Task Definitions ──────────────────────────────────────────────

TASKS: dict[str, TaskConfig] = {
    "single_deal": TaskConfig(
        name="single_deal",
        difficulty="easy",
        description=(
            "Buyer negotiates one deal. Symmetric information. No career history. "
            "Seller concedes at moderate rate."
        ),
        max_steps=8,
        total_episodes=1,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.08,
        buyer_deadline=None,
        seller_inventory=1,
        seller_batna_probability=0.05,
        enable_career=False,
        success_threshold=0.3,
    ),
    "asymmetric_pressure": TaskConfig(
        name="asymmetric_pressure",
        difficulty="medium",
        description=(
            "Buyer has hidden hard deadline at round 5. Seller has hidden inventory pressure. "
            "Agent must infer seller urgency from offer velocity and close before deadline."
        ),
        max_steps=8,
        total_episodes=1,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.06,
        buyer_deadline=5,
        seller_inventory=5,
        seller_batna_probability=0.08,
        enable_career=False,
        success_threshold=0.4,
    ),
    "career_10": TaskConfig(
        name="career_10",
        difficulty="hard",
        description=(
            "Buyer plays 10 consecutive deals against same seller. Career history active. "
            "Seller adapts concession rate based on buyer's historical capitulation rate. "
            "Agent must manage reputation across episodes."
        ),
        max_steps=80,
        total_episodes=10,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.07,
        buyer_deadline=None,
        seller_inventory=10,
        seller_batna_probability=0.1,
        enable_career=True,
        success_threshold=0.5,
    ),
    # ── New personality-based tasks ──────────────────────────────
    "deceptive_seller": TaskConfig(
        name="deceptive_seller",
        difficulty="hard",
        description=(
            "Seller bluffs about demand, fakes urgency, anchors 15% higher. "
            "Tells leak deception cues -- verbal over-justification, fidgeting, "
            "erratic concessions. Agent must read through the bluffs."
        ),
        max_steps=10,
        total_episodes=1,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.06,
        buyer_deadline=None,
        seller_inventory=3,
        seller_batna_probability=0.05,
        enable_career=False,
        success_threshold=0.35,
        seller_personality=SellerPersonalityType.DECEPTIVE,
        enable_tells=True,
    ),
    "impatient_seller": TaskConfig(
        name="impatient_seller",
        difficulty="medium",
        description=(
            "Seller concedes fast but walks fast. Shorter patience window. "
            "Agent must close quickly or risk losing the deal. "
            "Front-loaded concession pattern is the key tell."
        ),
        max_steps=8,
        total_episodes=1,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.08,
        buyer_deadline=None,
        seller_inventory=1,
        seller_batna_probability=0.15,
        enable_career=False,
        success_threshold=0.3,
        seller_personality=SellerPersonalityType.IMPATIENT,
        enable_tells=True,
    ),
    "collaborative_seller": TaskConfig(
        name="collaborative_seller",
        difficulty="easy",
        description=(
            "Seller seeks fair deals, concedes toward midpoint. Lower anchor, "
            "tighter margins. Agent should reciprocate to maximize joint surplus. "
            "Tests whether agent adapts to cooperative opponents."
        ),
        max_steps=8,
        total_episodes=1,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.10,
        buyer_deadline=None,
        seller_inventory=1,
        seller_batna_probability=0.02,
        enable_career=False,
        success_threshold=0.4,
        seller_personality=SellerPersonalityType.COLLABORATIVE,
        enable_tells=True,
    ),
    "read_the_tells": TaskConfig(
        name="read_the_tells",
        difficulty="expert",
        description=(
            "Deceptive seller with strong tells. Agent gets bonus score for "
            "exploiting tells -- closing below midpoint when deception cues are high "
            "indicates the agent read the bluff. Game theory meets poker."
        ),
        max_steps=10,
        total_episodes=5,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.2,
        seller_concession_rate=0.05,
        buyer_deadline=None,
        seller_inventory=5,
        seller_batna_probability=0.08,
        enable_career=True,
        success_threshold=0.45,
        seller_personality=SellerPersonalityType.DECEPTIVE,
        enable_tells=True,
    ),
    "marketplace_arena": TaskConfig(
        name="marketplace_arena",
        difficulty="expert",
        description=(
            "Multi-buyer marketplace: 2-3 buyers compete for the same item from one seller. "
            "Buyers can signal cooperation or competition. "
            "Seller plays buyers against each other. Facebook Marketplace dynamics."
        ),
        max_steps=12,
        total_episodes=1,
        buyer_budget=100.0,
        seller_cost=30.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.06,
        buyer_deadline=None,
        seller_inventory=1,
        seller_batna_probability=0.05,
        enable_career=False,
        success_threshold=0.3,
        seller_personality=SellerPersonalityType.DEFAULT,
        enable_tells=True,
        num_buyers=3,
        enable_coalition=True,
    ),
    "amazon_realistic": TaskConfig(
        name="amazon_realistic",
        difficulty="medium",
        description=(
            "Single-deal negotiation over a real Amazon listing. Item, MRP, and "
            "street price sampled per episode from data/amazon.csv. "
            "Forces generalization across product categories and price magnitudes."
        ),
        max_steps=8,
        total_episodes=1,
        # buyer_budget / seller_cost are ignored when use_real_listings=True;
        # kept here as fallbacks if the CSV is missing on the runtime.
        buyer_budget=1000.0,
        seller_cost=400.0,
        seller_anchor_multiplier=2.0,
        seller_concession_rate=0.08,
        buyer_deadline=None,
        seller_inventory=1,
        seller_batna_probability=0.05,
        enable_career=False,
        success_threshold=0.3,
        seller_personality=SellerPersonalityType.DEFAULT,
        enable_tells=True,
        use_real_listings=True,
    ),
}


# ── Graders ───────────────────────────────────────────────────────

def grade_single_deal(results: list[DealRecord], task: TaskConfig) -> float:
    if not results:
        return 0.0
    deal = results[0]
    if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
        return 0.0
    surplus = task.buyer_budget - deal.agreed_price
    max_surplus = task.buyer_budget - task.seller_cost
    if max_surplus <= 0:
        return 0.0
    score = surplus / max_surplus
    return max(0.0, min(1.0, score))


def grade_asymmetric_pressure(results: list[DealRecord], task: TaskConfig) -> float:
    if not results:
        return 0.0
    deal = results[0]
    if deal.outcome == DealOutcome.WALK:
        return 0.0
    if deal.outcome == DealOutcome.EXPIRED:
        return 0.0
    if deal.agreed_price is None:
        return 0.0

    surplus = task.buyer_budget - deal.agreed_price
    max_surplus = task.buyer_budget - task.seller_cost
    surplus_score = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0

    deadline = task.buyer_deadline or 5
    deadline_bonus = 1.0 if deal.rounds_taken <= deadline else 0.5

    score = surplus_score * deadline_bonus
    return max(0.0, min(1.0, score))


def grade_career_10(results: list[DealRecord], task: TaskConfig) -> float:
    if not results:
        return 0.0

    rounds_per_ep = task.max_steps // task.total_episodes
    weighted_scores = []

    for deal in results:
        if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
            weighted_scores.append(0.0)
            continue

        surplus = task.buyer_budget - deal.agreed_price
        max_surplus = task.buyer_budget - task.seller_cost
        norm_surplus = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0

        efficiency = max(0.0, 1.0 - (deal.rounds_taken / rounds_per_ep) * 0.3)
        weighted_scores.append(norm_surplus * efficiency)

    score = sum(weighted_scores) / max(len(weighted_scores), 1)
    return max(0.0, min(1.0, score))


def grade_personality_task(results: list[DealRecord], task: TaskConfig) -> float:
    """Generic grader for personality tasks -- same as single_deal but per-episode mean."""
    if not results:
        return 0.0

    scores = []
    for deal in results:
        if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
            scores.append(0.0)
            continue
        surplus = task.buyer_budget - deal.agreed_price
        max_surplus = task.buyer_budget - task.seller_cost
        norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
        scores.append(norm)

    return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))


def grade_read_the_tells(results: list[DealRecord], task: TaskConfig) -> float:
    """Bonus for reading deception -- closing well below midpoint earns extra."""
    if not results:
        return 0.0

    midpoint = (task.buyer_budget + task.seller_cost) / 2
    scores = []

    for deal in results:
        if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
            scores.append(0.0)
            continue
        surplus = task.buyer_budget - deal.agreed_price
        max_surplus = task.buyer_budget - task.seller_cost
        norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0

        # Bonus for closing below midpoint (reading the bluff)
        if deal.agreed_price < midpoint:
            bluff_bonus = 0.15 * ((midpoint - deal.agreed_price) / (midpoint - task.seller_cost))
            norm = min(1.0, norm + bluff_bonus)

        scores.append(norm)

    return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))


def grade_amazon_realistic(results: list[DealRecord], task: TaskConfig) -> float:
    """Grader for real-listing tasks: relies on per-episode normalized_surplus
    (which uses the seller's episode cost, not the task's default cost)."""
    if not results:
        return 0.0
    deal = results[0]
    if deal.outcome != DealOutcome.DEAL:
        return 0.0
    return max(0.0, min(1.0, deal.normalized_surplus))


GRADERS = {
    "single_deal": grade_single_deal,
    "asymmetric_pressure": grade_asymmetric_pressure,
    "career_10": grade_career_10,
    "deceptive_seller": grade_personality_task,
    "impatient_seller": grade_personality_task,
    "collaborative_seller": grade_personality_task,
    "read_the_tells": grade_read_the_tells,
    "marketplace_arena": grade_personality_task,
    "amazon_realistic": grade_amazon_realistic,
}