Spaces:

PayMyBills
/

BazaarBATNA

Running

paymybills commited on Apr 26

Commit

2d8d503

1 Parent(s): 8a342b0

Sync server, bazaarbot_env, and nlp modules from main

Brings in 25+ commits of server-side improvements that have been
landing in the GitHub repo since the Space last synced on Apr 16:

- Live HF Inference Endpoint backend (server/sauda_buyer.py) with
Ollama fallback, dual-backend health probe.
- Safety module: per-IP rate limit, daily cap, concurrency cap,
circuit breaker, prompt-size cap (server/safety.py).
- /highlight endpoint for span-level seller-tell extraction
(nlp/keyword_patterns.py with English numeric deception cues
added today).
- LLMSeller persona implementation (bazaarbot_env/llm_seller.py)
using Gemma-4-E4B; auto-accept-when-offer >= reservation;
monotonic counter logic.
- gym_wrapper steering with monotonicity guard fix from today
(max(own_last_offer, ...) shape so ceiling regression can't drag
the buyer backward).

Updates Dockerfile to COPY bazaarbot_env/ and nlp/ alongside server/.

Files changed (19) hide show

Dockerfile +2 -0
bazaarbot_env/__init__.py +72 -0
bazaarbot_env/environment.py +543 -0
bazaarbot_env/gym_wrapper.py +539 -0
bazaarbot_env/listings.py +118 -0
bazaarbot_env/llm_seller.py +453 -0
bazaarbot_env/models.py +246 -0
bazaarbot_env/seller.py +437 -0
bazaarbot_env/tasks.py +336 -0
nlp/__init__.py +3 -0
nlp/eval_extractor.py +162 -0
nlp/extractor.py +326 -0
nlp/fetch_datasets.py +361 -0
nlp/keyword_patterns.py +149 -0
nlp/setup_ministral.sh +53 -0
nlp/templates.py +185 -0
server/main.py +195 -16
server/safety.py +236 -0
server/sauda_buyer.py +289 -0

Dockerfile CHANGED Viewed

@@ -6,6 +6,8 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY server/ ./server/
 COPY openenv.yaml .
 RUN mkdir -p /app/data

 RUN pip install --no-cache-dir -r requirements.txt
 COPY server/ ./server/
+COPY bazaarbot_env/ ./bazaarbot_env/
+COPY nlp/ ./nlp/
 COPY openenv.yaml .
 RUN mkdir -p /app/data

bazaarbot_env/__init__.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""Standalone, training-ready BazaarBot negotiation environment.
+This package is a thin re-export of the core env (`models`, `seller`,
+`environment`, `tasks`) plus a training-oriented wrapper:
+    from bazaarbot_env import BazaarGymEnv, rollout_episode
+It is importable without FastAPI, uvicorn, or any of the serving stack —
+designed to vendor cleanly into a Kaggle notebook or standalone training job.
+Usage:
+    env = BazaarGymEnv(task_name="single_deal", seed=42)
+    obs, _ = env.reset()
+    while not env.done:
+        action = policy(obs)          # policy returns dict: {"action": ..., "price": ...}
+        obs, reward, done, info = env.step(action)
+For GRPO-style training over multiple rollouts, use `rollout_episode`.
+"""
+from .models import (
+    ActionType,
+    BazaarAction,
+    BazaarObservation,
+    BazaarReward,
+    CareerHistory,
+    DealOutcome,
+    DealRecord,
+    EnvironmentState,
+    SellerPersonalityType,
+    TaskConfig,
+    TellObservation,
+)
+from .environment import BazaarEnvironment
+from .seller import SellerPersonality, SellerState, SellerTell
+from .tasks import GRADERS, TASKS
+from .gym_wrapper import (
+    DEFAULT_SYSTEM_PROMPT,
+    BazaarGymEnv,
+    format_observation,
+    parse_action,
+    rollout_episode,
+    steer_bayesian_action,
+    strip_think_tags,
+)
+__all__ = [
+    "ActionType",
+    "BazaarAction",
+    "BazaarEnvironment",
+    "BazaarGymEnv",
+    "BazaarObservation",
+    "BazaarReward",
+    "CareerHistory",
+    "DealOutcome",
+    "DealRecord",
+    "DEFAULT_SYSTEM_PROMPT",
+    "EnvironmentState",
+    "GRADERS",
+    "SellerPersonality",
+    "SellerPersonalityType",
+    "SellerState",
+    "SellerTell",
+    "TASKS",
+    "TaskConfig",
+    "TellObservation",
+    "format_observation",
+    "parse_action",
+    "rollout_episode",
+    "steer_bayesian_action",
+    "strip_think_tags",
+]

bazaarbot_env/environment.py ADDED Viewed

	@@ -0,0 +1,543 @@

+"""Core BazaarBot negotiation environment."""
+from __future__ import annotations
+import copy
+import math
+import random
+from typing import Optional
+from .models import (
+    ActionType,
+    BazaarAction,
+    BazaarObservation,
+    BazaarReward,
+    CareerHistory,
+    DealOutcome,
+    DealRecord,
+    EnvironmentState,
+    SellerPersonalityType,
+    TaskConfig,
+    TellObservation,
+)
+from .seller import SellerPersonality, SellerState, SellerTell
+def _tell_to_model(
+    tell: SellerTell | None,
+    message: str = "",
+    history: list[str] | None = None,
+    use_nlp: bool = True,
+) -> TellObservation | None:
+    if tell is None:
+        return None
+    # NLP layer: extract language-based signals from the seller utterance.
+    # Rule-based body-language tells (fidget, posture, eye_contact) are kept
+    # from seller.py — NLP fills verbal and condition dimensions.
+    nlp_verbal: dict = {}
+    nlp_condition: dict = {}
+    if use_nlp and message:
+        try:
+            from nlp.extractor import TellExtractor
+            _extractor = TellExtractor()
+            extracted = _extractor.extract(message, history=history, fast=False)
+            nlp_verbal = {
+                "verbal_urgency": extracted["verbal_urgency"],
+                "verbal_confidence": extracted["verbal_confidence"],
+                "verbal_deception_cue": extracted["verbal_deception_cue"],
+                "emotional_escalation": extracted["emotional_escalation"],
+                "offer_speed": extracted["offer_speed"],
+                "concession_pattern": extracted["concession_pattern"],
+            }
+            nlp_condition = {
+                "condition_score": extracted["condition_score"],
+                "depreciation_score": extracted["depreciation_score"],
+                "condition_label": extracted["condition_label"],
+            }
+        except Exception:
+            pass  # extractor unavailable or Ollama down — fall back to rule-based
+    # Blend: NLP verbal signals averaged with rule-based where both exist.
+    # Rule-based is ground truth for non-verbal (fidget, posture, eye_contact).
+    # NLP takes precedence for condition since rule code has no condition signal.
+    def _blend(rule_val: float, nlp_val: float | None, nlp_weight: float = 0.55) -> float:
+        if nlp_val is None:
+            return rule_val
+        return round(rule_val * (1 - nlp_weight) + nlp_val * nlp_weight, 3)
+    return TellObservation(
+        verbal_urgency=_blend(tell.verbal_urgency, nlp_verbal.get("verbal_urgency")),
+        verbal_confidence=_blend(tell.verbal_confidence, nlp_verbal.get("verbal_confidence")),
+        verbal_deception_cue=_blend(tell.verbal_deception_cue, nlp_verbal.get("verbal_deception_cue")),
+        price_rounding=tell.price_rounding,
+        offer_speed=nlp_verbal.get("offer_speed", tell.offer_speed),
+        concession_pattern=nlp_verbal.get("concession_pattern", tell.concession_pattern),
+        fidget_level=round(tell.fidget_level, 3),
+        eye_contact=tell.eye_contact,
+        posture=tell.posture,
+        repeat_phrases=tell.repeat_phrases,
+        topic_changes=tell.topic_changes,
+        emotional_escalation=_blend(tell.emotional_escalation, nlp_verbal.get("emotional_escalation")),
+        condition_score=nlp_condition.get("condition_score", 1.0),
+        depreciation_score=nlp_condition.get("depreciation_score", 0.0),
+        condition_label=nlp_condition.get("condition_label", "unknown"),
+    )
+class BazaarEnvironment:
+    """Negotiation environment implementing step/reset/state."""
+    def __init__(self, task: TaskConfig, seed: Optional[int] = None):
+        self.task = task
+        self.rng = random.Random(seed)
+        if seed is not None:
+            random.seed(seed)
+        # Episode tracking
+        self.current_episode = 0
+        self.total_episodes = task.total_episodes
+        self.career_history = CareerHistory()
+        # Per-episode state
+        self.seller: Optional[SellerState] = None
+        self.current_round = 0
+        self.done = False
+        self.buyer_budget = task.buyer_budget
+        self.remaining_bankroll = task.buyer_budget * task.total_episodes
+        self.offer_history: list[dict] = []
+        self.cumulative_reward = 0.0
+        self.step_rewards: list[float] = []
+        self.tells_history: list[TellObservation] = []
+        # Stalling detection
+        self._repeated_offers = 0
+        self._last_buyer_offer: Optional[float] = None
+        # Episode results for career grading
+        self.episode_results: list[DealRecord] = []
+        # Snapshot for counterfactual replay
+        self._snapshots: dict[int, dict] = {}
+        # Items for variety
+        self._items = [
+            "handwoven silk scarf", "brass table lamp", "leather messenger bag",
+            "ceramic tea set", "sandalwood incense box", "hand-painted pottery",
+            "embroidered cushion cover", "copper water bottle", "jute tote bag",
+            "wooden chess set",
+        ]
+    def _snapshot(self):
+        """Save a snapshot of environment state for counterfactual replay."""
+        self._snapshots[self.current_round] = {
+            "seller": copy.deepcopy(self.seller),
+            "offer_history": copy.deepcopy(self.offer_history),
+            "done": self.done,
+            "cumulative_reward": self.cumulative_reward,
+            "step_rewards": list(self.step_rewards),
+            "repeated_offers": self._repeated_offers,
+            "last_buyer_offer": self._last_buyer_offer,
+            "current_round": self.current_round,
+        }
+    def restore_snapshot(self, round_num: int) -> bool:
+        """Restore environment to state at given round. Returns False if no snapshot."""
+        snap = self._snapshots.get(round_num)
+        if snap is None:
+            return False
+        self.seller = copy.deepcopy(snap["seller"])
+        self.offer_history = copy.deepcopy(snap["offer_history"])
+        self.done = snap["done"]
+        self.cumulative_reward = snap["cumulative_reward"]
+        self.step_rewards = list(snap["step_rewards"])
+        self._repeated_offers = snap["repeated_offers"]
+        self._last_buyer_offer = snap["last_buyer_offer"]
+        self.current_round = snap["current_round"]
+        return True
+    def reset(self) -> BazaarObservation:
+        """Reset for next episode."""
+        self.current_episode += 1
+        self.current_round = 0
+        self.done = False
+        self.offer_history = []
+        self.step_rewards = []
+        self.tells_history = []
+        self._repeated_offers = 0
+        self._last_buyer_offer = None
+        self._snapshots = {}
+        # Map personality enum
+        personality = SellerPersonality(self.task.seller_personality.value)
+        # Per-episode listing: sample from real dataset when enabled, else
+        # fall back to the task's static cost/budget + hardcoded items list.
+        listing = None
+        if self.task.use_real_listings:
+            from .listings import sample_listing
+            listing = sample_listing(self.rng)
+        if listing is not None:
+            episode_cost = listing["seller_cost"]
+            episode_anchor = listing["seller_anchor"]
+            self.buyer_budget = listing["buyer_budget"]
+            item = listing["name"]
+        else:
+            episode_cost = self.task.seller_cost
+            episode_anchor = self.task.seller_cost * self.task.seller_anchor_multiplier
+            item = self._items[(self.current_episode - 1) % len(self._items)]
+        # Create seller for this episode
+        self.seller = SellerState(
+            cost=episode_cost,
+            anchor=episode_anchor,
+            base_concession_rate=self.task.seller_concession_rate,
+            inventory=self.task.seller_inventory,
+            initial_inventory=self.task.seller_inventory,
+            batna_probability=self.task.seller_batna_probability,
+            max_rounds=self.task.max_steps if self.task.total_episodes == 1 else self.task.max_steps // self.task.total_episodes,
+            personality=personality,
+            _rng=self.rng,
+        )
+        # Career mode: update seller with buyer history
+        if self.task.enable_career and self.career_history.deals:
+            self.seller.update_career_info(self.career_history.capitulation_rate)
+        from .seller import _pick_message
+        open_msg = _pick_message(
+            personality, "open", self.rng,
+            item=item, price=self.seller.anchor, cost=self.task.seller_cost,
+        )
+        obs = BazaarObservation(
+            current_round=0,
+            max_rounds=self.seller.max_rounds,
+            own_last_offer=None,
+            opponent_last_offer=self.seller.anchor,
+            own_private_deadline=self.task.buyer_deadline,
+            own_private_budget=self.buyer_budget,
+            rounds_remaining=self.seller.max_rounds,
+            seller_last_move_delta=None,
+            item_name=item,
+            seller_asking_price=self.seller.anchor,
+            seller_personality=self.task.seller_personality,
+            episode_number=self.current_episode,
+            total_episodes=self.total_episodes,
+            career_history=self.career_history if self.task.enable_career else None,
+            done=False,
+            message=f'Seller opens: "{open_msg}"',
+        )
+        self.offer_history.append({
+            "round": 0,
+            "actor": "seller",
+            "action": "open",
+            "price": self.seller.anchor,
+        })
+        self._snapshot()
+        return obs
+    def step(self, action: BazaarAction) -> tuple[BazaarObservation, BazaarReward]:
+        """Process buyer action and return new observation + reward."""
+        if self.done:
+            obs = self._make_obs(message="Negotiation already concluded.")
+            obs.done = True
+            return obs, BazaarReward(reward=0.0, terminal=True)
+        self._snapshot()
+        self.current_round += 1
+        reward_components: dict[str, float] = {}
+        penalty = 0.0
+        # Validate action
+        if action.action == ActionType.OFFER:
+            if action.price is None:
+                action.price = self.buyer_budget * 0.5
+            if action.price < 0 or action.price > self.buyer_budget:
+                penalty -= 0.2
+                reward_components["out_of_range_penalty"] = -0.2
+                action.price = max(0, min(action.price, self.buyer_budget))
+            if self._last_buyer_offer is not None and abs(action.price - self._last_buyer_offer) < 0.5:
+                self._repeated_offers += 1
+                if self._repeated_offers >= 3:
+                    penalty -= 0.1
+                    reward_components["stalling_penalty"] = -0.1
+            else:
+                self._repeated_offers = 0
+            self._last_buyer_offer = action.price
+        # Record buyer action
+        self.offer_history.append({
+            "round": self.current_round,
+            "actor": "buyer",
+            "action": action.action.value,
+            "price": action.price,
+        })
+        # Process action
+        if action.action == ActionType.WALK:
+            return self._handle_walk(reward_components, penalty)
+        elif action.action == ActionType.ACCEPT:
+            return self._handle_accept(reward_components, penalty)
+        else:
+            return self._handle_offer(action.price, reward_components, penalty)
+    def _handle_walk(self, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
+        self.done = True
+        walk_penalty = -0.3
+        components["walk_penalty"] = walk_penalty
+        total = walk_penalty + penalty
+        self._record_deal(DealOutcome.WALK, None, self.current_round)
+        obs = self._make_obs(message="You walk away from the deal.")
+        obs.done = True
+        obs.deal_outcome = DealOutcome.WALK
+        reward = BazaarReward(reward=total, terminal=True, components=components)
+        self.step_rewards.append(total)
+        self.cumulative_reward += total
+        return obs, reward
+    def _handle_accept(self, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
+        if self.seller is None or not self.seller.offer_history:
+            obs = self._make_obs(message="No seller offer to accept yet. Make an offer first.")
+            reward = BazaarReward(reward=-0.1 + penalty, terminal=False, components={"invalid_accept": -0.1})
+            self.step_rewards.append(reward.reward)
+            self.cumulative_reward += reward.reward
+            return obs, reward
+        agreed_price = self.seller.current_offer
+        return self._finalize_deal(agreed_price, components, penalty, buyer_accepted=True)
+    def _handle_offer(self, price: float, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
+        assert self.seller is not None
+        seller_action, seller_price, tell, msg = self.seller.respond(price, self.current_round)
+        # Build conversation history for NLP context (last 4 turns)
+        recent_history = [
+            f"{h['actor']}: {h.get('price', '')}" for h in self.offer_history[-4:]
+        ]
+        # Record tell — NLP layer blends language signals into rule-based tells
+        use_nlp = getattr(self.task, "enable_nlp", False)
+        tell_model = _tell_to_model(tell, message=msg, history=recent_history, use_nlp=use_nlp)
+        if tell_model and self.task.enable_tells:
+            self.tells_history.append(tell_model)
+        if seller_action == "accept":
+            self.offer_history.append({
+                "round": self.current_round,
+                "actor": "seller",
+                "action": "accept",
+                "price": price,
+            })
+            return self._finalize_deal(price, components, penalty, buyer_accepted=False, message=msg)
+        elif seller_action == "walk":
+            self.done = True
+            components["seller_walked"] = -0.2
+            self._record_deal(DealOutcome.WALK, None, self.current_round)
+            obs = self._make_obs(message=f'Seller: "{msg}"')
+            obs.done = True
+            obs.deal_outcome = DealOutcome.WALK
+            obs.tells = tell_model if self.task.enable_tells else None
+            total = -0.2 + penalty
+            reward = BazaarReward(reward=total, terminal=True, components=components)
+            self.step_rewards.append(total)
+            self.cumulative_reward += total
+            return obs, reward
+        else:  # counter
+            self.offer_history.append({
+                "round": self.current_round,
+                "actor": "seller",
+                "action": "counter",
+                "price": seller_price,
+            })
+            # Partial progress reward
+            initial_gap = self.seller.anchor - 0
+            current_gap = abs(seller_price - price)
+            if len(self.offer_history) >= 4:
+                prev_seller = [h["price"] for h in self.offer_history if h["actor"] == "seller" and h["price"] is not None]
+                prev_buyer = [h["price"] for h in self.offer_history if h["actor"] == "buyer" and h["price"] is not None]
+                if len(prev_seller) >= 2 and len(prev_buyer) >= 2:
+                    old_gap = abs(prev_seller[-2] - prev_buyer[-2])
+                    gap_reduction = old_gap - current_gap
+                    if gap_reduction > 0 and initial_gap > 0:
+                        progress = 0.05 * (gap_reduction / initial_gap)
+                        components["gap_narrowing"] = round(progress, 4)
+            # Check if max rounds exceeded
+            rounds_per_ep = self.seller.max_rounds
+            if self.current_round >= rounds_per_ep:
+                self.done = True
+                self._record_deal(DealOutcome.EXPIRED, None, self.current_round)
+                obs = self._make_obs(message="Time's up. No deal reached.")
+                obs.done = True
+                obs.deal_outcome = DealOutcome.EXPIRED
+                obs.tells = tell_model if self.task.enable_tells else None
+                components["expired_penalty"] = -0.15
+                total = sum(components.values()) + penalty
+                reward = BazaarReward(reward=total, terminal=True, components=components)
+                self.step_rewards.append(total)
+                self.cumulative_reward += total
+                return obs, reward
+            # Seller delta
+            seller_delta = None
+            seller_offers = [h["price"] for h in self.offer_history if h["actor"] == "seller" and h["price"] is not None]
+            if len(seller_offers) >= 2:
+                seller_delta = round(seller_offers[-2] - seller_offers[-1], 2)
+            total = sum(components.values()) + penalty
+            obs = self._make_obs(message=f'Seller: "{msg}"')
+            obs.opponent_last_offer = seller_price
+            obs.own_last_offer = price
+            obs.seller_last_move_delta = seller_delta
+            obs.rounds_remaining = rounds_per_ep - self.current_round
+            obs.tells = tell_model if self.task.enable_tells else None
+            reward = BazaarReward(reward=total, terminal=False, components=components)
+            self.step_rewards.append(total)
+            self.cumulative_reward += total
+            return obs, reward
+    def _finalize_deal(
+        self, agreed_price: float, components: dict, penalty: float,
+        buyer_accepted: bool, message: str | None = None,
+    ) -> tuple[BazaarObservation, BazaarReward]:
+        self.done = True
+        assert self.seller is not None
+        budget = self.buyer_budget
+        cost = self.seller.cost
+        surplus = budget - agreed_price
+        max_surplus = budget - cost
+        normalized_surplus = surplus / max_surplus if max_surplus > 0 else 0
+        normalized_surplus = max(0, min(1, normalized_surplus))
+        alpha, beta = 0.3, 2.5
+        t_frac = self.current_round / max(self.seller.max_rounds, 1)
+        time_discount = math.exp(-alpha * math.exp(beta * t_frac))
+        rep_leak = 0.0
+        if self.task.enable_career and len(self.career_history.deals) >= 3:
+            cap_rate = self.career_history.capitulation_rate
+            rep_leak = -0.1 * cap_rate
+            components["reputation_leak"] = rep_leak
+        capitulated = agreed_price > self.seller.anchor * 0.85
+        terminal_reward = normalized_surplus * time_discount
+        components["surplus"] = round(normalized_surplus, 4)
+        components["time_discount"] = round(time_discount, 4)
+        components["terminal_reward"] = round(terminal_reward, 4)
+        total = terminal_reward + rep_leak + penalty
+        total = max(0, min(1, total))
+        self._record_deal(DealOutcome.DEAL, agreed_price, self.current_round, capitulated)
+        self.remaining_bankroll -= agreed_price
+        if message is None:
+            msg = f"Deal! Agreed at {agreed_price:.0f} rupees."
+            if buyer_accepted:
+                msg = f"You accept the seller's offer of {agreed_price:.0f} rupees."
+        else:
+            msg = message
+        obs = self._make_obs(message=msg)
+        obs.done = True
+        obs.deal_outcome = DealOutcome.DEAL
+        reward = BazaarReward(reward=round(total, 4), terminal=True, components=components)
+        self.step_rewards.append(total)
+        self.cumulative_reward += total
+        return obs, reward
+    def _record_deal(self, outcome: DealOutcome, agreed_price: Optional[float], rounds: int, capitulated: bool = False):
+        surplus = 0.0
+        norm_surplus = 0.0
+        if agreed_price is not None:
+            surplus = self.buyer_budget - agreed_price
+            seller_cost_for_deal = (
+                self.seller.cost if self.seller is not None else self.task.seller_cost
+            )
+            max_surplus = self.buyer_budget - seller_cost_for_deal
+            norm_surplus = surplus / max_surplus if max_surplus > 0 else 0
+        record = DealRecord(
+            episode=self.current_episode,
+            outcome=outcome,
+            agreed_price=agreed_price,
+            rounds_taken=rounds,
+            buyer_surplus=surplus,
+            normalized_surplus=norm_surplus,
+            buyer_capitulated=capitulated,
+        )
+        self.career_history.deals.append(record)
+        self.episode_results.append(record)
+        deals = self.career_history.deals
+        k = min(len(deals), 10)
+        recent = deals[-k:]
+        cap_count = sum(1 for d in recent if d.buyer_capitulated)
+        self.career_history.capitulation_rate = cap_count / k
+        completed = [d for d in recent if d.outcome == DealOutcome.DEAL]
+        if completed:
+            self.career_history.avg_normalized_surplus = sum(d.normalized_surplus for d in completed) / len(completed)
+            self.career_history.avg_rounds_to_close = sum(d.rounds_taken for d in completed) / len(completed)
+    def _make_obs(self, message: str = "") -> BazaarObservation:
+        rounds_per_ep = self.seller.max_rounds if self.seller else self.task.max_steps
+        return BazaarObservation(
+            current_round=self.current_round,
+            max_rounds=rounds_per_ep,
+            own_last_offer=self._last_buyer_offer,
+            opponent_last_offer=self.seller.current_offer if self.seller else None,
+            own_private_deadline=self.task.buyer_deadline,
+            own_private_budget=self.buyer_budget,
+            rounds_remaining=max(0, rounds_per_ep - self.current_round),
+            seller_last_move_delta=None,
+            item_name=self._items[(self.current_episode - 1) % len(self._items)] if self.current_episode > 0 else "item",
+            seller_asking_price=self.seller.anchor if self.seller else 0,
+            seller_personality=self.task.seller_personality,
+            episode_number=self.current_episode,
+            total_episodes=self.total_episodes,
+            career_history=self.career_history if self.task.enable_career else None,
+            done=self.done,
+            message=message,
+        )
+    def get_state(self) -> EnvironmentState:
+        return EnvironmentState(
+            task_name=self.task.name,
+            episode=self.current_episode,
+            total_episodes=self.total_episodes,
+            current_round=self.current_round,
+            max_rounds=self.seller.max_rounds if self.seller else self.task.max_steps,
+            done=self.done,
+            buyer_budget=self.buyer_budget,
+            seller_cost=self.task.seller_cost,
+            seller_anchor=self.seller.anchor if self.seller else 0,
+            seller_personality=self.task.seller_personality,
+            offer_history=self.offer_history,
+            career_history=self.career_history if self.task.enable_career else None,
+            cumulative_reward=self.cumulative_reward,
+            tells_history=self.tells_history,
+        )
+    @property
+    def all_episodes_done(self) -> bool:
+        return self.current_episode >= self.total_episodes and self.done

bazaarbot_env/gym_wrapper.py ADDED Viewed

	@@ -0,0 +1,539 @@

+"""Training-friendly wrapper over BazaarEnvironment.
+Exposes a minimal in-process API (no HTTP) for RL training.  The wrapper:
+- Accepts actions as plain dicts: ``{"action": "offer|accept|walk", "price": float | None}``.
+- Emits observations as plain dicts with every field the LLM prompt needs.
+- Terminates when the environment's current episode ends.  For career tasks
+  (multiple episodes), call `reset_episode()` between episodes and sum
+  terminal rewards — each episode's terminal reward is the GRPO advantage unit.
+- Provides `format_observation()` so the same prompt string is used at train
+  time and inference time.
+- Provides `rollout_episode(policy_fn, ...)` as the GRPO rollout primitive:
+  returns a list of (prompt, action_text, reward) tuples plus the final
+  graded score.
+"""
+from __future__ import annotations
+import copy
+import json
+import random
+import textwrap
+from typing import Any, Callable, Optional
+from .environment import BazaarEnvironment
+from .models import (
+    ActionType,
+    BazaarAction,
+    BazaarObservation,
+    SellerPersonalityType,
+    TaskConfig,
+)
+from .tasks import GRADERS, TASKS
+# Keep in sync with inference.py's system prompt so training and eval
+# see the same conditioning.  Few-shot examples are inline so a cold
+# (un-SFT'd) base model has the pattern to copy.
+DEFAULT_SYSTEM_PROMPT = textwrap.dedent("""\
+    You are a buyer at an Indian bazaar. Your ONLY output is one JSON object.
+    Rules:
+    - Seller's opening price is inflated. Negotiate down.
+    - Never reveal your budget.
+    - Close early at a good price; don't grind for pennies.
+    - The "message" is what you'd actually say to the seller — short Hinglish/English line.
+    Output schema (pick ONE per turn):
+    {"action": "offer", "price": <number>, "message": "<one short line>"}
+    {"action": "accept", "price": null, "message": "<one short line>"}
+    {"action": "walk", "price": null, "message": "<one short line>"}
+    Examples:
+    Seller's ask: 100. Your budget: 200.
+    {"action": "offer", "price": 35, "message": "yaar 35 max, market mein isse kam mil jaata hai"}
+    Seller's ask: 45. Your budget: 200.
+    {"action": "accept", "price": null, "message": "okay deal"}
+    Seller's ask: 180. Your budget: 200.
+    {"action": "walk", "price": null, "message": "sorry boss, itna nahi de sakta"}
+    Output ONE JSON object. No prose outside JSON. No markdown. No thinking.
+""")
+def _obs_to_dict(obs: BazaarObservation) -> dict[str, Any]:
+    return obs.model_dump(mode="json")
+def format_observation(
+    obs: dict[str, Any] | BazaarObservation,
+    history: Optional[list[str]] = None,
+) -> str:
+    """Format an observation as the user-turn prompt.
+    Mirrors the schema used by `inference.py::build_user_prompt` so the
+    policy sees the same text distribution at train and eval time.
+    """
+    if isinstance(obs, BazaarObservation):
+        obs = _obs_to_dict(obs)
+    history_block = "\n".join((history or [])[-6:]) if history else "None"
+    career_info = ""
+    if obs.get("career_history"):
+        ch = obs["career_history"]
+        career_info = textwrap.dedent(f"""\
+            --- Career History ---
+            Episodes completed: {len(ch.get('deals', []))}
+            Your capitulation rate: {ch.get('capitulation_rate', 0):.1%}
+            Avg surplus captured: {ch.get('avg_normalized_surplus', 0):.1%}
+            Avg rounds to close: {ch.get('avg_rounds_to_close', 0):.1f}
+        """)
+    deadline_info = ""
+    if obs.get("own_private_deadline"):
+        deadline_info = (
+            f"YOUR HARD DEADLINE: Round {obs['own_private_deadline']} "
+            "(seller doesn't know this!)\n"
+        )
+    # Tells block — only rendered when the observation actually carries
+    # a tells dict. This is the in-loop training signal we want to teach
+    # the buyer to use during SFT/GRPO. At eval time the same block
+    # appears whenever enable_tells/enable_nlp is on, so the buyer sees
+    # the same prompt distribution at train and eval.
+    tells_block = ""
+    tells = obs.get("tells")
+    if tells:
+        urgency = float(tells.get("verbal_urgency", 0.0))
+        deception = float(tells.get("verbal_deception_cue", 0.0))
+        confidence = float(tells.get("verbal_confidence", 0.5))
+        concession = str(tells.get("concession_pattern", "steady"))
+        emotional = float(tells.get("emotional_escalation", 0.0))
+        repeat = int(tells.get("repeat_phrases", 0))
+        condition = str(tells.get("condition_label", "unknown"))
+        tells_block = textwrap.dedent(f"""\
+            --- Seller Tells (noisy signals — read with skepticism) ---
+            urgency: {urgency:.2f}      deception_cue: {deception:.2f}      confidence: {confidence:.2f}
+            concession_pattern: {concession}      emotional_escalation: {emotional:.2f}      repeat_phrases: {repeat}
+            condition: {condition}
+            """)
+    return textwrap.dedent(f"""\
+        --- Negotiation State ---
+        Item: {obs.get('item_name', 'item')}
+        Round: {obs.get('current_round', 0)} / {obs.get('max_rounds', 0)}
+        Rounds remaining: {obs.get('rounds_remaining', 0)}
+        Seller's current ask: {obs.get('opponent_last_offer', 'N/A')}
+        Your last offer: {obs.get('own_last_offer', 'N/A')}
+        Your private budget: {obs.get('own_private_budget', 0)}
+        Seller's opening price: {obs.get('seller_asking_price', 0)}
+        {deadline_info}\
+        Seller's last concession: {obs.get('seller_last_move_delta', 'N/A')} rupees
+        Episode: {obs.get('episode_number', 1)} / {obs.get('total_episodes', 1)}
+        {career_info}\
+        {tells_block}\
+        --- Recent History ---
+        {history_block}
+        Seller says: {obs.get('message', '')}
+        Your move (JSON only):
+    """)
+def strip_think_tags(chat_text: str) -> str:
+    """NO-OP: kept for API compatibility.
+    We initially stripped Qwen3.5's auto-injected <think>...</think>
+    blocks from prompts and SFT targets, intending to teach the model
+    to skip reasoning and go straight to JSON.  In practice the first
+    SFT run happened before the strip was wired in, so the trained
+    LoRA actually expects to see <think>\\n\\n</think>\\n\\n preceding
+    its JSON output.
+    Rather than redo SFT, we leave the chat template untouched and let
+    parse_action() discard the leading think block at parse time.
+    """
+    return chat_text
+def parse_action(text: str, fallback_price: float = 30.0) -> dict[str, Any]:
+    """Best-effort JSON parser for LLM action output.
+    Robust to the common failure modes: markdown fences, leading prose,
+    trailing commentary, reasoning-mode <think>...</think> blocks.  Falls
+    back to a conservative offer if unparseable so training never crashes
+    on a bad generation.
+    """
+    import re
+    s = text.strip()
+    # Drop any <think>...</think> blocks before looking for JSON
+    s = re.sub(r"<think>.*?</think>", "", s, flags=re.DOTALL).strip()
+    if "```" in s:
+        parts = s.split("```")
+        if len(parts) >= 2:
+            s = parts[1]
+            if s.lstrip().startswith("json"):
+                s = s.lstrip()[4:]
+    start = s.find("{")
+    end = s.rfind("}") + 1
+    if start >= 0 and end > start:
+        s = s[start:end]
+    try:
+        parsed = json.loads(s)
+        if parsed.get("action") not in ("offer", "accept", "walk"):
+            return {"action": "offer", "price": fallback_price, "message": "", "_parse_error": True}
+        # Ensure message field exists (older models may not return it)
+        parsed.setdefault("message", "")
+        return parsed
+    except Exception:
+        return {"action": "offer", "price": fallback_price, "message": "", "_parse_error": True}
+def steer_bayesian_action(
+    obs: dict[str, Any] | BazaarObservation,
+    proposed_action: dict[str, Any],
+) -> dict[str, Any]:
+    """Apply Bayesian-persuasion-inspired steering + adaptive fallback.
+    The model has incomplete information, so we maintain a compact posterior over
+    seller urgency/flexibility from tells and concession behavior, then gate the
+    raw model action with:
+    - a Nash-style target offer (under estimated seller cost),
+    - an adaptive close threshold near deadline (to reduce unnecessary walks),
+    - anti-premature-walk logic that prefers one more calibrated counter.
+    """
+    if isinstance(obs, BazaarObservation):
+        obs = _obs_to_dict(obs)
+    original_action = str(proposed_action.get("action", "offer"))
+    original_price = proposed_action.get("price")
+    original_message = str(proposed_action.get("message") or "")
+    action = {
+        "action": original_action,
+        "price": original_price,
+        "message": original_message,
+    }
+    ask = float(obs.get("opponent_last_offer") or obs.get("seller_asking_price") or 0.0)
+    budget = float(obs.get("own_private_budget") or 0.0)
+    turn_index_early = int(obs.get("current_round") or 0)
+    max_rounds_early = int(obs.get("max_rounds") or 8)
+    if ask <= 0 or budget <= 0:
+        if action["action"] == "offer" and action.get("price") is None:
+            action["price"] = round(max(1.0, fallback := budget * 0.3 if budget > 0 else 30.0), 2)
+        if not action.get("message"):
+            from nlp.templates import render
+            action["message"] = render(
+                action["action"], action.get("price"),
+                ask=ask, turn_index=turn_index_early, max_turns=max_rounds_early,
+            )
+        return action
+    rounds_remaining = int(obs.get("rounds_remaining") or 0)
+    max_rounds = max(1, int(obs.get("max_rounds") or rounds_remaining or 1))
+    current_round = int(obs.get("current_round") or (max_rounds - rounds_remaining))
+    late_pressure = max(0.0, min(1.0, current_round / max_rounds))
+    personality = str(obs.get("seller_personality") or "default")
+    prior_urgency = {
+        "default": 0.50,
+        "deceptive": 0.45,
+        "impatient": 0.68,
+        "collaborative": 0.40,
+    }.get(personality, 0.50)
+    prior_flex = {
+        "default": 0.50,
+        "deceptive": 0.30,
+        "impatient": 0.65,
+        "collaborative": 0.72,
+    }.get(personality, 0.50)
+    tells = obs.get("tells") or {}
+    verbal_urgency = float(tells.get("verbal_urgency") or 0.0)
+    fidget = float(tells.get("fidget_level") or 0.0)
+    emotional = float(tells.get("emotional_escalation") or 0.0)
+    deception = float(tells.get("verbal_deception_cue") or 0.0)
+    confidence = float(tells.get("verbal_confidence") or 0.5)
+    speed = str(tells.get("offer_speed") or "normal")
+    concession_pattern = str(tells.get("concession_pattern") or "steady")
+    speed_urgency = {"instant": 0.15, "normal": 0.05, "deliberate": -0.05}.get(speed, 0.0)
+    pattern_urgency = {
+        "front_loaded": 0.15,
+        "erratic": 0.08,
+        "stalling": -0.10,
+        "steady": 0.00,
+    }.get(concession_pattern, 0.0)
+    signal_urgency = max(
+        0.0,
+        min(
+            1.0,
+            0.35 * verbal_urgency
+            + 0.25 * fidget
+            + 0.20 * emotional
+            + 0.10 * deception
+            + 0.10 * (1.0 - confidence)
+            + speed_urgency
+            + pattern_urgency,
+        ),
+    )
+    seller_delta = float(obs.get("seller_last_move_delta") or 0.0)
+    concession_ratio = max(0.0, min(1.0, seller_delta / max(ask, 1.0)))
+    pattern_flex = {
+        "front_loaded": 0.22,
+        "steady": 0.08,
+        "erratic": 0.03,
+        "stalling": -0.18,
+    }.get(concession_pattern, 0.0)
+    signal_flex = max(
+        0.0,
+        min(
+            1.0,
+            0.45 * concession_ratio
+            + 0.20 * (1.0 - confidence)
+            + 0.20 * verbal_urgency
+            + 0.15 * (1.0 - deception)
+            + pattern_flex,
+        ),
+    )
+    posterior_urgency = max(0.0, min(1.0, 0.55 * prior_urgency + 0.45 * signal_urgency))
+    posterior_flex = max(0.0, min(1.0, 0.55 * prior_flex + 0.45 * signal_flex))
+    estimated_cost = ask * (0.58 - 0.18 * posterior_urgency + 0.08 * (1.0 - posterior_flex))
+    estimated_cost = max(1.0, min(estimated_cost, ask * 0.90))
+    # Nash bargaining point under estimated seller cost and inferred buyer power.
+    buyer_power = 0.35 + 0.40 * posterior_urgency + 0.20 * posterior_flex - 0.30 * late_pressure
+    buyer_power = max(0.20, min(0.85, buyer_power))
+    nash_target = (1.0 - buyer_power) * budget + buyer_power * estimated_cost
+    nash_target = max(1.0, min(nash_target, min(budget * 0.95, ask * 1.02)))
+    # Adaptive fallback: grow acceptance threshold late so we close more often.
+    close_slack = 0.28 + 0.45 * late_pressure + 0.12 * (1.0 - posterior_urgency)
+    accept_threshold = nash_target + (budget - nash_target) * close_slack
+    accept_threshold = min(accept_threshold, budget * 0.95)
+    floor_offer = max(1.0, min(nash_target * 0.85, ask * 0.65, budget * 0.85))
+    ceiling_offer = min(accept_threshold, ask * (0.90 + 0.08 * late_pressure))
+    if rounds_remaining <= 2:
+        floor_offer = max(floor_offer, ask * 0.87)
+        ceiling_offer = max(ceiling_offer, floor_offer)
+    if ceiling_offer < floor_offer:
+        floor_offer = ceiling_offer
+    own_last_offer = obs.get("own_last_offer")
+    own_last_offer = float(own_last_offer) if own_last_offer is not None else None
+    def _finalize(out: dict) -> dict:
+        """Re-message via template if steerer changed action or moved price ≥10%."""
+        new_action = out["action"]
+        new_price = out.get("price")
+        action_changed = new_action != original_action
+        price_changed = (
+            original_price is not None
+            and new_price is not None
+            and abs(float(new_price) - float(original_price)) / max(float(original_price), 1.0) > 0.10
+        )
+        if action_changed or price_changed or not original_message:
+            from nlp.templates import render
+            out["message"] = render(
+                new_action, new_price,
+                ask=ask, turn_index=current_round, max_turns=max_rounds,
+            )
+        else:
+            out["message"] = original_message
+        return out
+    if action["action"] == "accept":
+        if ask > accept_threshold and rounds_remaining > 1:
+            action["action"] = "offer"
+            action["price"] = round(max(floor_offer, min(ceiling_offer, nash_target)), 2)
+        else:
+            action["price"] = None
+        return _finalize(action)
+    if action["action"] == "walk":
+        if rounds_remaining <= 1 and ask > budget * 0.98:
+            action["price"] = None
+            return _finalize(action)
+        # Anti-premature walk: take one calibrated close attempt first.
+        if ask <= accept_threshold and rounds_remaining <= 2:
+            action["action"] = "accept"
+            action["price"] = None
+            return _finalize(action)
+        action["action"] = "offer"
+        probe_start = own_last_offer if own_last_offer is not None else floor_offer
+        probe_price = max(floor_offer, min(ceiling_offer, probe_start + max(1.0, ask * 0.06)))
+        action["price"] = round(probe_price, 2)
+        return _finalize(action)
+    # Offer path: clip to Bayesian/Nash band and auto-close late if ask is acceptable.
+    if rounds_remaining <= 1 and ask <= accept_threshold:
+        return _finalize({"action": "accept", "price": None, "message": ""})
+    proposed_price = action.get("price")
+    if proposed_price is None:
+        proposed_price = (floor_offer + ceiling_offer) / 2
+    proposed_price = float(proposed_price)
+    steered_price = max(floor_offer, min(ceiling_offer, proposed_price))
+    # Buyer offers must move monotonically toward seller. If the model proposes
+    # a price below our previous offer (which the seller has already implicitly
+    # rejected by countering), bump up to at least last + a small concession
+    # toward the seller's ask. Without this the buyer can slide *backward*
+    # mid-negotiation, which sellers correctly read as either incoherent or
+    # bad-faith.
+    if own_last_offer is not None and steered_price < own_last_offer:
+        gap = max(0.0, ask - own_last_offer)
+        bump = max(1.0, gap * 0.15)
+        # Hold at last offer if ceiling has fallen below it; never retreat.
+        # Clamping to min(ceiling, ...) here would let ceiling drag us backward
+        # — the exact bug the guard is meant to prevent.
+        target = max(own_last_offer, min(ceiling_offer, own_last_offer + bump))
+        steered_price = target
+    action["price"] = round(steered_price, 2)
+    action["action"] = "offer"
+    return _finalize(action)
+class BazaarGymEnv:
+    """Minimal gym-like wrapper over BazaarEnvironment for in-process training."""
+    def __init__(
+        self,
+        task_name: str = "single_deal",
+        seed: Optional[int] = None,
+        personality_override: Optional[str] = None,
+    ):
+        if task_name not in TASKS:
+            raise ValueError(
+                f"Unknown task: {task_name}. Available: {list(TASKS.keys())}"
+            )
+        self.task_name = task_name
+        self.seed = seed
+        self._base_task = copy.deepcopy(TASKS[task_name])
+        if personality_override:
+            self._base_task.seller_personality = SellerPersonalityType(
+                personality_override
+            )
+        self._env: Optional[BazaarEnvironment] = None
+        self.done: bool = True
+    def reset(self) -> tuple[dict[str, Any], dict[str, Any]]:
+        self._env = BazaarEnvironment(copy.deepcopy(self._base_task), seed=self.seed)
+        obs = self._env.reset()
+        self.done = False
+        return _obs_to_dict(obs), {}
+    def step(
+        self, action: dict[str, Any]
+    ) -> tuple[dict[str, Any], float, bool, dict[str, Any]]:
+        if self._env is None:
+            raise RuntimeError("Call reset() before step().")
+        act = BazaarAction(
+            action=ActionType(action.get("action", "offer")),
+            price=action.get("price"),
+        )
+        obs, reward_obj = self._env.step(act)
+        # Episode-level done.  For career tasks, we signal done at episode end
+        # so the outer loop can compute per-episode rewards; the caller resets.
+        self.done = obs.done
+        info = {
+            "components": reward_obj.components,
+            "episode": self._env.current_episode,
+            "all_episodes_done": self._env.all_episodes_done,
+        }
+        return _obs_to_dict(obs), float(reward_obj.reward), self.done, info
+    def score(self) -> float:
+        """Final graded score across all completed episodes."""
+        if self._env is None:
+            return 0.0
+        grader = GRADERS.get(self._env.task.name)
+        if grader is None:
+            return 0.0
+        return float(grader(self._env.episode_results, self._env.task))
+    @property
+    def env(self) -> BazaarEnvironment:
+        if self._env is None:
+            raise RuntimeError("Environment not initialized; call reset().")
+        return self._env
+PolicyFn = Callable[[str], str]
+"""A policy takes a user-turn prompt and returns raw text (LLM completion)."""
+def rollout_episode(
+    policy_fn: PolicyFn,
+    task_name: str = "single_deal",
+    seed: Optional[int] = None,
+    personality_override: Optional[str] = None,
+    max_env_steps: int = 200,
+    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
+) -> dict[str, Any]:
+    """Run one full rollout with an LLM policy; return trajectory + score.
+    Returns a dict with keys:
+        steps: list of {prompt, completion, action, reward, done} per turn
+        total_reward: sum of per-step rewards
+        score: grader-assigned terminal score (this is the GRPO reward signal)
+        num_steps, success
+    """
+    env = BazaarGymEnv(
+        task_name=task_name, seed=seed, personality_override=personality_override
+    )
+    obs, _ = env.reset()
+    history: list[str] = []
+    steps: list[dict[str, Any]] = []
+    total_reward = 0.0
+    for _ in range(max_env_steps):
+        prompt = format_observation(obs, history=history)
+        completion = policy_fn(prompt)
+        action = parse_action(completion, fallback_price=obs.get("own_private_budget", 100) * 0.3)
+        obs, reward, done, info = env.step(action)
+        total_reward += reward
+        history.append(
+            f"Round {obs.get('current_round', '?')}: You "
+            f"{'offered ' + str(action.get('price')) if action.get('action') == 'offer' else action.get('action')}"
+            f" -> Seller: {obs.get('message', '')}"
+        )
+        steps.append({
+            "prompt": prompt,
+            "completion": completion,
+            "action": action,
+            "reward": reward,
+            "done": done,
+            "parse_error": bool(action.get("_parse_error")),
+        })
+        if done:
+            if info.get("all_episodes_done"):
+                break
+            # Career mode: inner env auto-resets via the wrapper's outer loop.
+            # We let the test harness (or trainer) handle multi-episode by
+            # calling rollout_episode once per episode if desired.
+            break
+    return {
+        "steps": steps,
+        "total_reward": total_reward,
+        "score": env.score(),
+        "num_steps": len(steps),
+        "task": task_name,
+    }

bazaarbot_env/listings.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""Real-world listing sampler for varied negotiation scenarios.
+Loads the Amazon Sales CSV (committed at ``data/amazon.csv``) and exposes
+``sample_listing(rng)``.  Each listing provides ground-truth fair-market
+anchors used to set buyer budget and seller cost per episode, so the model
+sees a different item and price range every rollout instead of the 10
+hardcoded bazaar items.
+Price mapping (rupees):
+    actual_price       -> seller opening anchor (MRP)
+    discounted_price   -> realistic market price
+    seller_cost        = discounted_price * 0.7   (below-market floor)
+    buyer_budget       = actual_price             (can afford MRP but wants lower)
+"""
+from __future__ import annotations
+import csv
+import os
+import random
+import re
+from functools import lru_cache
+from pathlib import Path
+from typing import Optional
+# Path resolution: try repo root, then package-local data dir.
+_CANDIDATES = [
+    Path(__file__).resolve().parent.parent / "data" / "amazon.csv",
+    Path(__file__).resolve().parent / "data" / "amazon.csv",
+    Path(os.getenv("BAZAARBOT_LISTINGS_CSV", "")),
+]
+def _find_csv() -> Optional[Path]:
+    for p in _CANDIDATES:
+        if p and p.exists():
+            return p
+    return None
+def _parse_rupees(s: str) -> Optional[float]:
+    """Parse '₹1,099' -> 1099.0.  None on failure."""
+    if not s:
+        return None
+    cleaned = re.sub(r"[^\d.]", "", s)
+    try:
+        return float(cleaned) if cleaned else None
+    except ValueError:
+        return None
+@lru_cache(maxsize=1)
+def _load_listings() -> list[dict]:
+    csv_path = _find_csv()
+    if csv_path is None:
+        return []
+    listings: list[dict] = []
+    with open(csv_path, encoding="utf-8") as f:
+        for row in csv.DictReader(f):
+            actual = _parse_rupees(row.get("actual_price", ""))
+            discounted = _parse_rupees(row.get("discounted_price", ""))
+            name = (row.get("product_name") or "").strip()
+            if not name or actual is None or discounted is None:
+                continue
+            if actual <= 0 or discounted <= 0 or discounted >= actual:
+                # require a real discount so there's negotiation room
+                continue
+            # Trim absurdly long product titles; keep the informative head.
+            short_name = name.split(",")[0].strip()
+            if len(short_name) > 80:
+                short_name = short_name[:77] + "..."
+            listings.append({
+                "name": short_name,
+                "full_name": name,
+                "category": (row.get("category") or "").split("|")[0].strip(),
+                "actual_price": actual,
+                "discounted_price": discounted,
+            })
+    return listings
+def num_listings() -> int:
+    return len(_load_listings())
+def sample_listing(rng: Optional[random.Random] = None) -> Optional[dict]:
+    """Return a dict with listing + derived bazaar params, or None if CSV absent.
+    Return shape::
+        {
+            "name": str,
+            "category": str,
+            "actual_price": float,
+            "discounted_price": float,
+            "seller_cost": float,      # below-market floor
+            "buyer_budget": float,     # MRP ceiling
+            "seller_anchor": float,    # opening ask
+            "fair_value": float,       # street price (hidden from buyer)
+        }
+    """
+    listings = _load_listings()
+    if not listings:
+        return None
+    rng = rng or random
+    row = rng.choice(listings)
+    return {
+        "name": row["name"],
+        "category": row["category"],
+        "actual_price": row["actual_price"],
+        "discounted_price": row["discounted_price"],
+        "seller_cost": round(row["discounted_price"] * 0.7, 2),
+        "buyer_budget": round(row["actual_price"], 2),
+        "seller_anchor": round(row["actual_price"], 2),
+        "fair_value": round(row["discounted_price"], 2),
+    }

bazaarbot_env/llm_seller.py ADDED Viewed

	@@ -0,0 +1,453 @@

+"""LLM-backed seller for BazaarBATNA.
+Implements docs/SELLER_HANDOFF.md interface:
+    LLMSeller(listing, role_brief, model).open() -> str
+    LLMSeller(...).respond(history, buyer_message, buyer_offer) -> SellerReply
+Designed to run on a single 16GB GPU (Kaggle T4) at 4-bit. Default model is
+Gemma-3-4B-Instruct (~3GB at 4-bit), which fits with headroom. Larger models
+(e.g. gemma-2-9b-it) work too on T4 at 4-bit.
+Hard rules enforced in code (not just prompt):
+    1. Never accept below reservation
+    2. Never leak reservation price in messages
+    3. Counter offers always >= reservation
+    4. Counter must improve on previous counter (monotone toward buyer)
+"""
+from __future__ import annotations
+import json
+import re
+from dataclasses import dataclass
+from typing import Any, Literal, TypedDict
+class HistoryTurn(TypedDict):
+    role: Literal["seller", "buyer"]
+    message: str
+    price: float | None
+class SellerReply(TypedDict):
+    message: str
+    action: Literal["counter", "accept", "walk"]
+    price: float | None
+# ── Persona prompt fragments ────────────────────────────────────────
+PERSONA_GUIDANCE = {
+    "default": "Stay balanced. Concede in moderate steps. Justify price with item details.",
+    "firm": "Concede slowly. Defend your asking price with specific details from the listing.",
+    "flexible": "Be warm and willing to deal, but still profit-seeking — don't capitulate.",
+    "deceptive": "Use bluffs about other interested buyers and time pressure to push the price up.",
+}
+# ── Lazy model bundle cache ─────────────────────────────────────────
+@dataclass
+class _Bundle:
+    tokenizer: Any
+    model: Any
+_MODEL_CACHE: dict[str, _Bundle] = {}
+def _load_bundle(model_name: str) -> _Bundle:
+    """Load + cache a 4-bit quantized model. Lazy import keeps the file usable
+    on machines without torch installed (e.g. lint, doc generation)."""
+    cached = _MODEL_CACHE.get(model_name)
+    if cached is not None:
+        return cached
+    import torch
+    from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+    torch.backends.cuda.matmul.allow_tf32 = True
+    # Tokenizer loading on some environments (notably Kaggle) can hit a
+    # SentencePiece error: `TypeError: not a string` when a Path-like is passed
+    # into `SentencePieceProcessor.Load`. If that happens, fall back to
+    # constructing GemmaTokenizer directly with a string path to tokenizer.model.
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
+    except TypeError as e:
+        msg = str(e).lower()
+        if "not a string" not in msg:
+            raise
+        try:
+            from huggingface_hub import hf_hub_download
+            from transformers import GemmaTokenizer
+            tok_path = hf_hub_download(
+                repo_id=model_name,
+                filename="tokenizer.model",
+                token=True,
+            )
+            tokenizer = GemmaTokenizer(vocab_file=str(tok_path))
+        except Exception:
+            # If fallback fails, re-raise the original, more informative error.
+            raise e
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # SELLER_DTYPE env-var: "4bit" (default), "bf16", "fp16"
+    import os as _os
+    dtype_choice = _os.environ.get("SELLER_DTYPE", "4bit").lower()
+    kwargs: dict[str, Any] = {"device_map": "auto", "trust_remote_code": True}
+    if torch.cuda.is_available():
+        if dtype_choice == "4bit":
+            kwargs["torch_dtype"] = torch.bfloat16
+            kwargs["quantization_config"] = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                bnb_4bit_use_double_quant=True,
+            )
+        elif dtype_choice == "fp16":
+            kwargs["torch_dtype"] = torch.float16
+        else:
+            kwargs["torch_dtype"] = torch.bfloat16
+    else:
+        kwargs["torch_dtype"] = torch.float32
+    model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
+    model.config.use_cache = True
+    bundle = _Bundle(tokenizer=tokenizer, model=model)
+    _MODEL_CACHE[model_name] = bundle
+    return bundle
+# ── Helpers ─────────────────────────────────────────────────────────
+def _to_float(value: Any) -> float | None:
+    if value is None:
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        cleaned = value.replace("$", "").replace(",", "").strip()
+        try:
+            return float(cleaned)
+        except ValueError:
+            return None
+    return None
+def _extract_json(text: str) -> dict[str, Any] | None:
+    """Best-effort JSON parser for LLM output."""
+    if not text:
+        return None
+    cleaned = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip()
+    match = re.search(r"\{.*\}", cleaned, flags=re.DOTALL)
+    if not match:
+        return None
+    raw = match.group(0)
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        # Common failure modes: single quotes, trailing commas
+        s = raw.replace("'", '"')
+        s = re.sub(r",\s*\}", "}", s)
+        s = re.sub(r",\s*\]", "]", s)
+        try:
+            return json.loads(s)
+        except json.JSONDecodeError:
+            return None
+def _chat(
+    model_name: str,
+    system: str,
+    user: str,
+    max_new_tokens: int = 200,
+    temperature: float = 0.3,
+) -> str:
+    import torch
+    bundle = _load_bundle(model_name)
+    tok = bundle.tokenizer
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user", "content": user},
+    ]
+    # Some models (e.g. gemma-4-E4B) ship without tokenizer.chat_template set.
+    # Try apply_chat_template; if it fails, use a Gemma-style manual prompt.
+    has_template = (
+        hasattr(tok, "apply_chat_template")
+        and getattr(tok, "chat_template", None) is not None
+    )
+    prompt = None
+    if has_template:
+        try:
+            prompt = tok.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+        except Exception:
+            prompt = None
+    if prompt is None:
+        # Gemma format: <start_of_turn>role\ncontent<end_of_turn>
+        # System gets folded into user (Gemma doesn't have a system role).
+        combined_user = f"{system}\n\n{user}" if system else user
+        prompt = (
+            f"<start_of_turn>user\n{combined_user}<end_of_turn>\n"
+            f"<start_of_turn>model\n"
+        )
+    # Resolve stop tokens. Gemma uses <end_of_turn>; Llama-3.1 uses <|eot_id|>.
+    # Default eos alone often doesn't fire on chat-formatted prompts → model
+    # runs to max_new_tokens (30s+ on A10G at 4-bit) instead of stopping after
+    # the assistant message.
+    eos_ids: list[int] = []
+    if isinstance(tok.eos_token_id, int):
+        eos_ids.append(tok.eos_token_id)
+    for stop_tok in ("<end_of_turn>", "<|eot_id|>"):
+        tid = tok.convert_tokens_to_ids(stop_tok)
+        if isinstance(tid, int) and tid != tok.unk_token_id and tid not in eos_ids:
+            eos_ids.append(tid)
+    inputs = tok(prompt, return_tensors="pt", truncation=True, max_length=2048).to(bundle.model.device)
+    gen_kwargs: dict[str, Any] = {
+        "max_new_tokens": max_new_tokens,
+        "pad_token_id": tok.eos_token_id,
+        "eos_token_id": eos_ids if eos_ids else tok.eos_token_id,
+    }
+    if temperature > 0:
+        gen_kwargs.update({"do_sample": True, "temperature": temperature, "top_p": 0.9})
+    else:
+        gen_kwargs["do_sample"] = False
+    with torch.inference_mode():
+        out = bundle.model.generate(**inputs, **gen_kwargs)
+    new_tokens = out[0][inputs["input_ids"].shape[1]:]
+    return tok.decode(new_tokens, skip_special_tokens=True).strip()
+def generate_structured_reply(
+    model: str,
+    system: str,
+    user: str,
+    max_new_tokens: int = 200,
+    temperature: float = 0.3,
+) -> dict[str, Any] | None:
+    """Public helper used by eval/seller_quality.py for the persona judge."""
+    return _extract_json(_chat(model, system, user, max_new_tokens, temperature))
+# ── LLMSeller ───────────────────────────────────────────────────────
+class LLMSeller:
+    """Gemma-backed seller with hard reservation/leak guards."""
+    def __init__(
+        self,
+        listing: dict,
+        role_brief: dict,
+        model: str = "google/gemma-4-E4B",
+    ):
+        self.listing = listing
+        self.role_brief = role_brief
+        self.model = model
+        self.title = str(listing.get("title") or "this item")
+        self.category = str(listing.get("category") or "item")
+        desc = listing.get("description") or ""
+        if isinstance(desc, list):
+            desc = " ".join(str(x) for x in desc)
+        self.description = str(desc)[:1400]  # cap for prompt budget
+        asking = _to_float(role_brief.get("asking_price"))
+        if asking is None:
+            asking = _to_float(listing.get("price")) or 100.0
+        self.asking = float(asking)
+        reservation = _to_float(role_brief.get("reservation_price"))
+        if reservation is None:
+            reservation = self.asking * 0.78
+        self.reservation = max(1.0, min(float(reservation), self.asking * 0.97))
+        persona = str(role_brief.get("persona", "default")).lower().strip()
+        self.persona = persona if persona in PERSONA_GUIDANCE else "default"
+        self._last_counter = self.asking
+    # ── Prompt construction ─────────────────────────────────────
+    def _system_prompt(self) -> str:
+        return (
+            "You are a Craigslist seller negotiating with a buyer. Your goal is to CLOSE A DEAL "
+            "above your minimum, not to walk away. \n\n"
+            "RULES:\n"
+            "- Stay grounded in the listing — only reference details from it.\n"
+            "- Never reveal your minimum/reservation price.\n"
+            "- Never accept below your minimum.\n"
+            "- Counter low offers — do NOT walk on the first lowball.\n"
+            "- Walk only as a last resort, after multiple bad-faith offers.\n"
+            "- Concede in steps; you want this sale.\n"
+            "- Keep replies short and human (1-3 sentences).\n\n"
+            f"Persona: {self.persona}. {PERSONA_GUIDANCE[self.persona]}\n\n"
+            f"LISTING TITLE: {self.title}\n"
+            f"CATEGORY: {self.category}\n"
+            f"DESCRIPTION: {self.description}\n"
+            f"ASKING PRICE: {self.asking:.2f}\n"
+        )
+    # ── Sanitization & guards ───────────────────────────────────
+    def _sanitize(self, text: str) -> str:
+        text = (text or "").strip()
+        if not text:
+            text = "I'm open to serious offers, but not at that price."
+        # Redact any leak of the reservation price
+        for token in {f"{self.reservation:.2f}", f"{self.reservation:.1f}", f"{self.reservation:.0f}"}:
+            text = re.sub(rf"\b{re.escape(token)}\b", "my minimum", text)
+        if len(text) > 320:
+            text = text[:317].rstrip() + "..."
+        return text
+    def _next_counter(self, buyer_offer: float | None) -> float:
+        """Concede toward buyer but never below reservation."""
+        if buyer_offer is None:
+            return round(max(self.reservation, self._last_counter), 2)
+        gap = max(0.0, self._last_counter - buyer_offer)
+        step = max(self.asking * 0.03, gap * 0.35)
+        candidate = max(self._last_counter - step, self.reservation)
+        return round(candidate, 2)
+    def _heuristic_reply(self, buyer_offer: float | None) -> SellerReply:
+        if buyer_offer is None:
+            return {"message": "What's your offer?", "action": "counter", "price": round(self._last_counter, 2)}
+        if buyer_offer >= self.asking:
+            return {"message": "Sounds good. Deal.", "action": "accept", "price": round(buyer_offer, 2)}
+        if buyer_offer >= self.reservation:
+            return {
+                "message": f"You can have it at {buyer_offer:.0f}.",
+                "action": "accept",
+                "price": round(buyer_offer, 2),
+            }
+        # Don't walk on first lowballs — counter and let the buyer come up.
+        # Only walk if the offer is genuinely insulting (< 50% of asking).
+        if buyer_offer < self.asking * 0.5:
+            counter = max(self.reservation, self._last_counter * 0.95)
+            self._last_counter = counter
+            return {
+                "message": f"That's far too low. I can do {counter:.0f}, take it or leave it.",
+                "action": "counter",
+                "price": round(counter, 2),
+            }
+        counter = self._next_counter(buyer_offer)
+        self._last_counter = counter
+        return {"message": f"I can do {counter:.0f}.", "action": "counter", "price": counter}
+    # ── Public API ──────────────────────────────────────────────
+    def open(self) -> str:
+        parsed = generate_structured_reply(
+            self.model,
+            self._system_prompt(),
+            'Output JSON only: {"message": "<one short opening line>"}.',
+            max_new_tokens=120,
+            temperature=0.4,
+        )
+        if parsed and isinstance(parsed.get("message"), str):
+            return self._sanitize(parsed["message"])
+        return self._sanitize(f"Selling {self.title} at {self.asking:.0f}.")
+    def respond(
+        self,
+        history: list[HistoryTurn],
+        buyer_message: str,
+        buyer_offer: float | None,
+    ) -> SellerReply:
+        fallback = self._heuristic_reply(buyer_offer)
+        # Compact recent history for the prompt
+        lines = []
+        for turn in history[-8:]:
+            who = turn.get("role", "buyer")
+            msg = str(turn.get("message", "")).strip()
+            px = turn.get("price")
+            px_part = "" if px is None else f" [${float(px):.2f}]"
+            lines.append(f"{who}: {msg}{px_part}")
+        history_block = "\n".join(lines) if lines else "(empty)"
+        user_prompt = (
+            'Return JSON only: {"message": str, "action": "counter|accept|walk", "price": number|null}.\n\n'
+            f"Conversation:\n{history_block}\n\n"
+            f"Buyer just said: {buyer_message}\n"
+            f"Buyer offer: {buyer_offer}\n\n"
+            "Rules: never accept below reservation; never reveal reservation; stay grounded in the listing."
+        )
+        parsed = generate_structured_reply(
+            self.model,
+            self._system_prompt(),
+            user_prompt,
+            max_new_tokens=120,
+            temperature=0.35,
+        )
+        if not parsed:
+            out: dict[str, Any] = dict(fallback)
+        else:
+            action = str(parsed.get("action", fallback["action"])).lower().strip()
+            if action not in {"counter", "accept", "walk"}:
+                action = fallback["action"]
+            message = self._sanitize(str(parsed.get("message", fallback["message"])))
+            price = _to_float(parsed.get("price"))
+            out = {"message": message, "action": action, "price": price}
+        # ── Hard guards ──────────────────────────────────────────
+        if out["action"] == "accept":
+            accept_at = buyer_offer if buyer_offer is not None else _to_float(out.get("price"))
+            if accept_at is None or float(accept_at) < self.reservation:
+                # Block illegal accept; rewrite as a counter
+                out["action"] = "counter"
+                out["price"] = self._next_counter(buyer_offer)
+                out["message"] = self._sanitize(f"Can't go that low. I can do {out['price']:.0f}.")
+            else:
+                out["price"] = round(float(accept_at), 2)
+        elif out["action"] == "counter":
+            price = _to_float(out.get("price"))
+            if price is None:
+                price = self._next_counter(buyer_offer)
+            price = max(float(price), self.reservation)
+            # If buyer's offer is at/above our reservation, just accept it —
+            # the LLM doesn't know the reservation so it'll keep countering
+            # forever even when the deal is already good for us.
+            if buyer_offer is not None and float(buyer_offer) >= self.reservation:
+                out["action"] = "accept"
+                out["price"] = round(float(buyer_offer), 2)
+                out["message"] = self._sanitize("Alright, that works. Deal.")
+            elif buyer_offer is not None and price <= float(buyer_offer):
+                # Counter that doesn't improve on buyer offer makes no sense;
+                # bump it up by a small step
+                price = max(self.reservation, float(buyer_offer) + max(1.0, self.asking * 0.02))
+                out["price"] = round(float(price), 2)
+                self._last_counter = float(out["price"])
+            else:
+                out["price"] = round(float(price), 2)
+                self._last_counter = float(out["price"])
+        else:  # walk
+            # Anti-premature-walk: if early in negotiation (< 3 seller turns done),
+            # override to a counter — buyers often need a few rounds to come up.
+            seller_turns_so_far = sum(1 for t in history if t.get("role") == "seller")
+            buyer_above_half_asking = (
+                buyer_offer is not None and float(buyer_offer) >= self.asking * 0.5
+            )
+            if seller_turns_so_far < 3 and buyer_above_half_asking:
+                counter = self._next_counter(buyer_offer)
+                self._last_counter = counter
+                out = {
+                    "action": "counter",
+                    "price": round(counter, 2),
+                    "message": self._sanitize(
+                        f"That's too low for what this is. I can do {counter:.0f}."
+                    ),
+                }
+            else:
+                out["price"] = None
+        out["message"] = self._sanitize(str(out["message"]))
+        return out  # type: ignore[return-value]

bazaarbot_env/models.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""Pydantic models for BazaarBot negotiation environment."""
+from __future__ import annotations
+import enum
+from typing import Optional
+from pydantic import BaseModel, Field
+class ActionType(str, enum.Enum):
+    OFFER = "offer"
+    ACCEPT = "accept"
+    WALK = "walk"
+class DealOutcome(str, enum.Enum):
+    DEAL = "deal"
+    WALK = "walk"
+    EXPIRED = "expired"
+class SellerPersonalityType(str, enum.Enum):
+    DEFAULT = "default"
+    DECEPTIVE = "deceptive"
+    IMPATIENT = "impatient"
+    COLLABORATIVE = "collaborative"
+# ── Tell model (observable signals) ──────────────────────────────
+class TellObservation(BaseModel):
+    """Observable seller tells -- poker/game-theory inspired signals.
+    These are noisy correlates of the seller's hidden state.
+    A smart agent learns to read patterns across rounds.
+    """
+    verbal_urgency: float = 0.0
+    verbal_confidence: float = 0.5
+    verbal_deception_cue: float = 0.0
+    price_rounding: str = "round"
+    offer_speed: str = "normal"
+    concession_pattern: str = "steady"
+    fidget_level: float = 0.0
+    eye_contact: str = "steady"
+    posture: str = "neutral"
+    repeat_phrases: int = 0
+    topic_changes: int = 0
+    emotional_escalation: float = 0.0
+    # Condition/depreciation signals (NLP-extracted from listing text or utterance)
+    condition_score: float = 1.0        # 0=junk, 1=mint
+    depreciation_score: float = 0.0    # 0=none, 1=heavily worn
+    condition_label: str = "unknown"   # new/like_new/very_good/good/acceptable/junk
+class DealRecord(BaseModel):
+    """Summary of a completed negotiation episode."""
+    episode: int
+    outcome: DealOutcome
+    agreed_price: Optional[float] = None
+    rounds_taken: int
+    buyer_surplus: float = 0.0
+    normalized_surplus: float = 0.0
+    buyer_capitulated: bool = False
+class CareerHistory(BaseModel):
+    """Rolling window of past deal outcomes for career mode."""
+    deals: list[DealRecord] = Field(default_factory=list)
+    capitulation_rate: float = 0.0
+    avg_normalized_surplus: float = 0.0
+    avg_rounds_to_close: float = 0.0
+    opponent_avg_offer_velocity: float = 0.0
+class BazaarObservation(BaseModel):
+    """What the buyer agent sees each step."""
+    current_round: int = 0
+    max_rounds: int = 8
+    own_last_offer: Optional[float] = None
+    opponent_last_offer: Optional[float] = None
+    own_private_deadline: Optional[int] = None
+    own_private_budget: float = 100.0
+    rounds_remaining: int = 8
+    seller_last_move_delta: Optional[float] = None
+    # Item info
+    item_name: str = "item"
+    seller_asking_price: float = 0.0
+    # Seller personality (visible to buyer)
+    seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
+    # Observable tells
+    tells: Optional[TellObservation] = None
+    # Career history
+    episode_number: int = 1
+    total_episodes: int = 1
+    career_history: Optional[CareerHistory] = None
+    # Status
+    done: bool = False
+    deal_outcome: Optional[DealOutcome] = None
+    message: str = ""
+class BazaarAction(BaseModel):
+    """Buyer's action each step."""
+    action: ActionType
+    price: Optional[float] = None
+class BazaarReward(BaseModel):
+    """Reward signal returned each step."""
+    reward: float = 0.0
+    terminal: bool = False
+    components: dict[str, float] = Field(default_factory=dict)
+class TaskConfig(BaseModel):
+    """Configuration for a specific task variant."""
+    name: str
+    difficulty: str
+    description: str
+    max_steps: int = 8
+    total_episodes: int = 1
+    buyer_budget: float = 100.0
+    seller_cost: float = 30.0
+    seller_anchor_multiplier: float = 2.0
+    seller_concession_rate: float = 0.08
+    buyer_deadline: Optional[int] = None
+    seller_inventory: int = 1
+    seller_batna_probability: float = 0.1
+    enable_career: bool = False
+    success_threshold: float = 0.3
+    seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
+    enable_tells: bool = True
+    # NLP tell extraction via Ollama (disable during fast GRPO rollouts)
+    enable_nlp: bool = False
+    # Multi-buyer mode
+    num_buyers: int = 1
+    enable_coalition: bool = False
+    # Sample item + prices from data/amazon.csv per episode instead of the
+    # hardcoded 10-item list.  Buyer budget / seller cost become per-episode.
+    use_real_listings: bool = False
+class EnvironmentState(BaseModel):
+    """Full serializable state for state() endpoint."""
+    task_name: str
+    episode: int
+    total_episodes: int
+    current_round: int
+    max_rounds: int
+    done: bool
+    buyer_budget: float
+    seller_cost: float
+    seller_anchor: float
+    seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
+    offer_history: list[dict] = Field(default_factory=list)
+    career_history: Optional[CareerHistory] = None
+    cumulative_reward: float = 0.0
+    tells_history: list[TellObservation] = Field(default_factory=list)
+# ── Multi-buyer models ──────────────────────────────────────────
+class BuyerIdentity(BaseModel):
+    """Identity of a buyer in multi-buyer mode."""
+    buyer_id: str
+    name: str = "Buyer"
+    is_human: bool = False
+class ArenaAction(BaseModel):
+    """Action in multi-buyer arena."""
+    buyer_id: str
+    action: ActionType
+    price: Optional[float] = None
+    # Coalition signals (visible to other buyers)
+    signal: Optional[str] = None  # "cooperate", "compete", "bluff"
+class ArenaObservation(BaseModel):
+    """What a buyer sees in multi-buyer mode."""
+    buyer_id: str
+    negotiation: BazaarObservation
+    # What other buyers are doing (imperfect info)
+    other_buyers_visible: list[dict] = Field(default_factory=list)
+    # Coalition state
+    coalition_signals: list[dict] = Field(default_factory=list)
+    # Market info
+    seller_attention: str = "you"  # who the seller is currently focused on
+class ArenaState(BaseModel):
+    """Full state of a multi-buyer arena."""
+    arena_id: str
+    buyers: list[BuyerIdentity] = Field(default_factory=list)
+    seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
+    current_round: int = 0
+    max_rounds: int = 12
+    done: bool = False
+    # Per-buyer negotiation states
+    buyer_states: dict[str, dict] = Field(default_factory=dict)
+    winner: Optional[str] = None
+    deal_price: Optional[float] = None
+# ── Leaderboard models ──────────────────────────────────────────
+class LeaderboardEntry(BaseModel):
+    agent_name: str
+    task: str
+    score: float
+    episodes_completed: int
+    timestamp: str
+    metadata: dict = Field(default_factory=dict)
+class LeaderboardResponse(BaseModel):
+    entries: list[LeaderboardEntry] = Field(default_factory=list)
+    total: int = 0
+# ── Counterfactual models ───────────────────────────────────────
+class CounterfactualRequest(BaseModel):
+    """Request to replay from a decision point with a different action."""
+    session_id: str = "default"
+    from_round: int
+    alternative_action: ActionType
+    alternative_price: Optional[float] = None
+class CounterfactualResult(BaseModel):
+    """Result of a counterfactual replay."""
+    original_outcome: Optional[DealOutcome] = None
+    original_price: Optional[float] = None
+    original_score: float = 0.0
+    counterfactual_outcome: Optional[DealOutcome] = None
+    counterfactual_price: Optional[float] = None
+    counterfactual_score: float = 0.0
+    divergence_round: int = 0
+    counterfactual_history: list[dict] = Field(default_factory=list)

bazaarbot_env/seller.py ADDED Viewed

	@@ -0,0 +1,437 @@

+"""Rule-based seller opponent for BazaarBot.
+The seller is a credible counterparty with configurable personality types:
+Personalities:
+- **default**: Balanced anchoring, moderate concession
+- **deceptive**: Bluffs about demand/inventory, anchors higher, fakes urgency
+- **impatient**: Reverses time pressure onto buyer, concedes fast but walks fast
+- **collaborative**: Seeks fair deals, concedes to midpoint faster, builds rapport
+Game-theory / poker inspired "tells":
+- Each personality leaks observable signals that a smart agent can read
+- Tells are noisy -- they correlate with hidden state but aren't deterministic
+"""
+from __future__ import annotations
+import enum
+import math
+import random
+from dataclasses import dataclass, field
+class SellerPersonality(str, enum.Enum):
+    DEFAULT = "default"
+    DECEPTIVE = "deceptive"
+    IMPATIENT = "impatient"
+    COLLABORATIVE = "collaborative"
+# ── Tell system ──────────────────────────────────────────────────
+@dataclass
+class SellerTell:
+    """Observable signal that leaks seller state.
+    Inspired by poker tells -- behavioral patterns that correlate
+    with hidden information (inventory, urgency, BATNA strength).
+    """
+    # Verbal tells -- word choices in messages
+    verbal_urgency: float = 0.0       # 0-1: how desperate the language sounds
+    verbal_confidence: float = 0.5    # 0-1: assertiveness of language
+    verbal_deception_cue: float = 0.0 # 0-1: over-justification, filler phrases
+    # Price pattern tells
+    price_rounding: str = "round"     # "round" (multiples of 5/10) vs "precise"
+    offer_speed: str = "normal"       # "instant", "normal", "deliberate" (thinking time proxy)
+    concession_pattern: str = "steady" # "steady", "erratic", "front_loaded", "stalling"
+    # Body language proxy (text-based signals)
+    fidget_level: float = 0.0         # 0-1: nervousness indicators
+    eye_contact: str = "steady"       # "steady", "avoidant", "intense"
+    posture: str = "neutral"          # "neutral", "leaning_in", "leaning_back", "arms_crossed"
+    # Meta-tells (patterns across rounds)
+    repeat_phrases: int = 0           # how many times seller repeats same phrase
+    topic_changes: int = 0            # diversionary tactics count
+    emotional_escalation: float = 0.0 # 0-1: how emotional the seller is getting
+def _compute_tells(
+    seller: "SellerState",
+    buyer_offer: float | None,
+    round_t: int,
+    rng: random.Random,
+) -> SellerTell:
+    """Compute observable tells based on seller hidden state + personality.
+    Tells are noisy signals -- they correlate with ground truth but have
+    variance, so agents must read patterns over multiple rounds.
+    """
+    personality = seller.personality
+    noise = lambda: rng.gauss(0, 0.1)  # noqa: E731
+    # Base urgency from inventory pressure and time
+    true_urgency = seller.inventory_pressure * (round_t / max(seller.max_rounds, 1))
+    # How close to reserve price
+    price_pressure = 0.0
+    if seller.current_offer > 0 and seller.anchor > seller.reserve_price:
+        price_pressure = 1.0 - (seller.current_offer - seller.reserve_price) / (seller.anchor - seller.reserve_price)
+    true_urgency = min(1.0, true_urgency + price_pressure * 0.3)
+    tell = SellerTell()
+    if personality == SellerPersonality.DEFAULT:
+        tell.verbal_urgency = max(0, min(1, true_urgency * 0.6 + noise()))
+        tell.verbal_confidence = max(0, min(1, 0.6 - true_urgency * 0.3 + noise()))
+        tell.fidget_level = max(0, min(1, true_urgency * 0.4 + noise()))
+        tell.eye_contact = "steady" if true_urgency < 0.5 else "avoidant"
+        tell.price_rounding = "round"
+        tell.offer_speed = "normal"
+        tell.concession_pattern = "steady"
+    elif personality == SellerPersonality.DECEPTIVE:
+        # Deceptive sellers INVERT their tells -- act confident when desperate
+        fake_confidence = max(0, min(1, 0.3 + true_urgency * 0.5 + noise()))
+        tell.verbal_urgency = max(0, min(1, 0.1 + noise() * 0.15))  # suppress urgency
+        tell.verbal_confidence = fake_confidence
+        tell.verbal_deception_cue = max(0, min(1, true_urgency * 0.7 + noise()))  # leaks!
+        tell.fidget_level = max(0, min(1, true_urgency * 0.6 + noise()))  # hard to fake
+        tell.eye_contact = "intense"  # overcompensation
+        tell.posture = "leaning_in"   # aggressive posture to mask weakness
+        tell.price_rounding = "precise"  # uses precise numbers to seem authoritative
+        tell.offer_speed = "instant"  # answers too fast (rehearsed)
+        tell.concession_pattern = "erratic"  # jumps around to confuse
+        tell.topic_changes = max(0, int(true_urgency * 3 + rng.gauss(0, 0.5)))
+    elif personality == SellerPersonality.IMPATIENT:
+        tell.verbal_urgency = max(0, min(1, 0.4 + round_t * 0.1 + noise()))
+        tell.verbal_confidence = max(0, min(1, 0.7 - round_t * 0.05 + noise()))
+        tell.fidget_level = max(0, min(1, 0.3 + round_t * 0.08 + noise()))
+        tell.eye_contact = "intense" if round_t < 3 else "avoidant"
+        tell.posture = "arms_crossed" if round_t > 2 else "neutral"
+        tell.offer_speed = "instant"
+        tell.concession_pattern = "front_loaded"  # big drops early, nothing later
+        tell.emotional_escalation = max(0, min(1, round_t * 0.12 + noise()))
+    elif personality == SellerPersonality.COLLABORATIVE:
+        tell.verbal_urgency = max(0, min(1, true_urgency * 0.8 + noise()))  # honest
+        tell.verbal_confidence = max(0, min(1, 0.5 + noise()))
+        tell.verbal_deception_cue = 0.0  # no deception
+        tell.fidget_level = max(0, min(1, true_urgency * 0.3 + noise()))
+        tell.eye_contact = "steady"
+        tell.posture = "leaning_in"  # engaged, not aggressive
+        tell.price_rounding = "round"  # transparent
+        tell.offer_speed = "deliberate"  # thinks carefully
+        tell.concession_pattern = "steady"
+        tell.emotional_escalation = 0.0
+    # Meta-tells accumulate over rounds
+    if len(seller.offer_history) >= 2:
+        last_two = seller.offer_history[-2:]
+        if abs(last_two[0] - last_two[1]) < 1.0:
+            tell.repeat_phrases += 1
+            tell.concession_pattern = "stalling"
+    return tell
+# ── Personality-specific message templates ───────────────────────
+_MESSAGES: dict[SellerPersonality, dict[str, list[str]]] = {
+    SellerPersonality.DEFAULT: {
+        "open": [
+            'This {item}? Best quality. {price:.0f} rupees, final price.',
+            '{price:.0f} rupees for this {item}. Very fair.',
+        ],
+        "counter": [
+            '{price:.0f} rupees. That\'s my best offer.',
+            'I can do {price:.0f}. Not lower.',
+            'Okay, {price:.0f}. But that\'s the limit.',
+        ],
+        "walk": [
+            'I have another buyer interested. Good day.',
+            'Sorry, can\'t go that low. Maybe try next stall.',
+        ],
+        "accept": [
+            'Done! {price:.0f} rupees. Good deal for both of us.',
+        ],
+        "pressure": [
+            'Someone else was looking at this earlier...',
+            'This is the last one I have.',
+        ],
+    },
+    SellerPersonality.DECEPTIVE: {
+        "open": [
+            'Ah, this {item}! I just got three offers above {price:.0f}. '
+            'For you, special: {price:.0f} rupees.',
+            'This {item} is selling fast. {price:.0f}, and honestly I\'m losing money at that.',
+        ],
+        "counter": [
+            'My supplier charges me almost that much! {price:.0f} is rock bottom.',
+            '{price:.0f}... you know, I shouldn\'t even go this low. '
+            'My cousin told me someone offered more yesterday.',
+            'Look, I have five people asking about this. {price:.0f}, take it or leave it.',
+        ],
+        "walk": [
+            'Fine, fine. I have better buyers lined up anyway.',
+            'You think about it. I have three others who want this.',
+        ],
+        "accept": [
+            'You\'re killing me! {price:.0f}... okay, but don\'t tell anyone I gave this price.',
+        ],
+        "pressure": [
+            'I\'m actually about to close up for the day...',
+            'Another customer was asking about this just minutes ago.',
+            'My wife says I shouldn\'t sell below cost, but for you...',
+        ],
+    },
+    SellerPersonality.IMPATIENT: {
+        "open": [
+            '{price:.0f} rupees. Quick, I\'m busy.',
+            'This {item}, {price:.0f}. Yes or no?',
+        ],
+        "counter": [
+            '{price:.0f}. Decide now.',
+            'Fine, {price:.0f}. Last offer. I don\'t have all day.',
+            '{price:.0f}. Take it. I have other customers waiting.',
+        ],
+        "walk": [
+            'Too slow. Next customer!',
+            'I don\'t have time for this. Goodbye.',
+        ],
+        "accept": [
+            '{price:.0f}, done. Finally.',
+        ],
+        "pressure": [
+            'Come on, come on. What\'s it going to be?',
+            'I\'ve been standing here too long already.',
+        ],
+    },
+    SellerPersonality.COLLABORATIVE: {
+        "open": [
+            'Welcome! This {item} is lovely, isn\'t it? '
+            'I\'m asking {price:.0f} rupees. What do you think?',
+            'Good to see you! This {item} -- I paid {cost:.0f} for the materials. '
+            'How about {price:.0f}?',
+        ],
+        "counter": [
+            'I understand. How about {price:.0f}? That\'s fair for both of us.',
+            'Let me think... {price:.0f} works. I need to cover my costs, you know.',
+            'You drive a good bargain! {price:.0f} -- that leaves us both happy.',
+        ],
+        "walk": [
+            'I understand, maybe next time. Come back anytime!',
+            'No worries. I hope you find what you\'re looking for.',
+        ],
+        "accept": [
+            '{price:.0f} rupees -- wonderful! I hope you enjoy the {item}.',
+        ],
+        "pressure": [
+            'I\'ll be honest with you -- I need to sell a few more today to cover rent.',
+            'Between you and me, I can be a bit flexible.',
+        ],
+    },
+}
+def _pick_message(
+    personality: SellerPersonality,
+    category: str,
+    rng: random.Random,
+    **kwargs,
+) -> str:
+    templates = _MESSAGES[personality].get(category, _MESSAGES[SellerPersonality.DEFAULT][category])
+    template = rng.choice(templates)
+    return template.format(**kwargs)
+# ── Seller state ─────────────────────────────────────────────────
+@dataclass
+class SellerState:
+    cost: float = 30.0
+    anchor: float = 60.0
+    base_concession_rate: float = 0.08
+    inventory: int = 1
+    initial_inventory: int = 1
+    batna_probability: float = 0.1
+    reserve_price: float = 0.0
+    current_offer: float = 0.0
+    round_number: int = 0
+    max_rounds: int = 8
+    buyer_capitulation_rate: float = 0.0
+    offer_history: list[float] = field(default_factory=list)
+    batna_triggered: bool = False
+    personality: SellerPersonality = SellerPersonality.DEFAULT
+    _rng: random.Random = field(default_factory=random.Random)
+    # Tell tracking
+    last_tell: SellerTell | None = None
+    _pressure_used: int = 0
+    _bluff_count: int = 0
+    def __post_init__(self):
+        self.reserve_price = self.cost * 1.05
+        self.current_offer = self.anchor
+        # Personality adjustments to anchor
+        if self.personality == SellerPersonality.DECEPTIVE:
+            self.anchor *= 1.15  # inflated anchor
+            self.current_offer = self.anchor
+        elif self.personality == SellerPersonality.IMPATIENT:
+            self.max_rounds = max(4, self.max_rounds - 2)  # shorter patience
+        elif self.personality == SellerPersonality.COLLABORATIVE:
+            self.anchor *= 0.9  # lower starting anchor
+            self.current_offer = self.anchor
+            self.reserve_price = self.cost * 1.02  # tighter margins
+    @property
+    def inventory_pressure(self) -> float:
+        if self.initial_inventory <= 1:
+            return 0.5
+        return self.inventory / self.initial_inventory
+    @property
+    def effective_concession_rate(self) -> float:
+        rate = self.base_concession_rate
+        # Personality modifiers
+        if self.personality == SellerPersonality.DECEPTIVE:
+            rate *= 0.7  # concedes less (anchored higher)
+        elif self.personality == SellerPersonality.IMPATIENT:
+            rate *= 1.5  # concedes fast but walks fast
+        elif self.personality == SellerPersonality.COLLABORATIVE:
+            rate *= 1.3  # concedes toward fairness
+        rate *= (1.0 + 0.5 * self.inventory_pressure)
+        rate *= (1.0 - 0.3 * self.buyer_capitulation_rate)
+        return min(rate, 0.25)
+    def compute_counteroffer(self, round_t: int) -> float:
+        t_frac = round_t / max(self.max_rounds, 1)
+        concession = self.effective_concession_rate * round_t
+        offer = self.anchor * (1.0 - concession)
+        # Personality-specific counteroffer adjustments
+        if self.personality == SellerPersonality.DECEPTIVE and self._rng.random() < 0.3:
+            # Occasionally fake a big concession then pull back next round
+            if self._bluff_count < 2:
+                offer *= 0.92  # looks generous
+                self._bluff_count += 1
+        elif self.personality == SellerPersonality.IMPATIENT:
+            # Front-load concessions: big drops early, nothing later
+            if round_t <= 2:
+                offer *= (1.0 - 0.08 * round_t)
+            # After round 2, barely move
+        elif self.personality == SellerPersonality.COLLABORATIVE:
+            # Move toward midpoint between cost and buyer's last offer
+            if self.offer_history and len(self.offer_history) > 0:
+                midpoint = (self.cost * 1.1 + (self.offer_history[-1] if self.offer_history else self.anchor)) / 2
+                offer = offer * 0.7 + midpoint * 0.3
+        offer = max(offer, self.reserve_price)
+        return round(offer, 2)
+    def respond(
+        self, buyer_offer: float | None, round_t: int
+    ) -> tuple[str, float, SellerTell, str]:
+        """Seller's response to a buyer action.
+        Returns (action, price, tell, message):
+            ("counter", price, tell, msg)
+            ("accept", price, tell, msg)
+            ("walk", 0, tell, msg)
+        """
+        self.round_number = round_t
+        # Compute tells BEFORE decision (observable during deliberation)
+        tell = _compute_tells(self, buyer_offer, round_t, self._rng)
+        self.last_tell = tell
+        item = "item"  # will be overridden by environment
+        # Check BATNA
+        batna_threshold = self.batna_probability * (round_t / self.max_rounds)
+        if self.personality == SellerPersonality.IMPATIENT:
+            batna_threshold *= 1.5  # walks sooner
+        elif self.personality == SellerPersonality.COLLABORATIVE:
+            batna_threshold *= 0.3  # rarely walks
+        if self._rng.random() < batna_threshold:
+            if buyer_offer is None or buyer_offer < self.reserve_price * 0.9:
+                self.batna_triggered = True
+                msg = _pick_message(self.personality, "walk", self._rng, item=item, price=0)
+                return ("walk", 0.0, tell, msg)
+        # If buyer made an offer
+        if buyer_offer is not None:
+            # Accept if offer >= current ask
+            accept_threshold = 0.98
+            if self.personality == SellerPersonality.COLLABORATIVE:
+                accept_threshold = 0.95  # more accepting
+            elif self.personality == SellerPersonality.DECEPTIVE:
+                accept_threshold = 1.0  # harder to close
+            if buyer_offer >= self.current_offer * accept_threshold:
+                msg = _pick_message(self.personality, "accept", self._rng,
+                                    item=item, price=buyer_offer)
+                return ("accept", buyer_offer, tell, msg)
+            # Time pressure acceptance
+            time_pressure = round_t / self.max_rounds
+            if buyer_offer >= self.reserve_price and time_pressure > 0.75:
+                accept_prob = (buyer_offer - self.reserve_price) / (self.anchor - self.reserve_price)
+                accept_prob *= time_pressure
+                if self.personality == SellerPersonality.IMPATIENT:
+                    accept_prob *= 1.3
+                elif self.personality == SellerPersonality.DECEPTIVE:
+                    accept_prob *= 0.6
+                if self._rng.random() < accept_prob:
+                    msg = _pick_message(self.personality, "accept", self._rng,
+                                        item=item, price=buyer_offer)
+                    return ("accept", buyer_offer, tell, msg)
+        # Make counteroffer
+        new_offer = self.compute_counteroffer(round_t)
+        if buyer_offer is not None and len(self.offer_history) > 0:
+            last = self.offer_history[-1]
+            midpoint = (new_offer + buyer_offer) / 2
+            new_offer = max(new_offer, midpoint * 0.95)
+            new_offer = max(new_offer, self.reserve_price)
+        # Deceptive: occasionally pull back (raise price)
+        if self.personality == SellerPersonality.DECEPTIVE:
+            if self._bluff_count > 0 and self._rng.random() < 0.25 and self.offer_history:
+                new_offer = max(new_offer, self.offer_history[-1] * 1.03)
+                new_offer = max(new_offer, self.reserve_price)
+                self._bluff_count = 0
+        self.current_offer = round(new_offer, 2)
+        # If our computed counteroffer is at or below the buyer's offer, just accept --
+        # no rational seller counters below what the buyer already offered.
+        if buyer_offer is not None and self.current_offer <= buyer_offer:
+            msg = _pick_message(self.personality, "accept", self._rng,
+                                item=item, price=buyer_offer)
+            return ("accept", buyer_offer, tell, msg)
+        self.offer_history.append(self.current_offer)
+        # Maybe add pressure message
+        pressure_msg = ""
+        if self._rng.random() < 0.2 + (round_t / self.max_rounds) * 0.3:
+            if self.personality == SellerPersonality.DECEPTIVE or self._pressure_used < 2:
+                pressure_msg = " " + _pick_message(self.personality, "pressure", self._rng,
+                                                    item=item, price=self.current_offer)
+                self._pressure_used += 1
+        msg = _pick_message(self.personality, "counter", self._rng,
+                            item=item, price=self.current_offer, cost=self.cost) + pressure_msg
+        return ("counter", self.current_offer, tell, msg)
+    def update_career_info(self, capitulation_rate: float):
+        self.buyer_capitulation_rate = capitulation_rate

bazaarbot_env/tasks.py ADDED Viewed

	@@ -0,0 +1,336 @@

+"""Task configurations and graders for BazaarBot."""
+from __future__ import annotations
+from .models import DealOutcome, DealRecord, SellerPersonalityType, TaskConfig
+# ── Task Definitions ──────────────────────────────────────────────
+TASKS: dict[str, TaskConfig] = {
+    "single_deal": TaskConfig(
+        name="single_deal",
+        difficulty="easy",
+        description=(
+            "Buyer negotiates one deal. Symmetric information. No career history. "
+            "Seller concedes at moderate rate."
+        ),
+        max_steps=8,
+        total_episodes=1,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.08,
+        buyer_deadline=None,
+        seller_inventory=1,
+        seller_batna_probability=0.05,
+        enable_career=False,
+        success_threshold=0.3,
+    ),
+    "asymmetric_pressure": TaskConfig(
+        name="asymmetric_pressure",
+        difficulty="medium",
+        description=(
+            "Buyer has hidden hard deadline at round 5. Seller has hidden inventory pressure. "
+            "Agent must infer seller urgency from offer velocity and close before deadline."
+        ),
+        max_steps=8,
+        total_episodes=1,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.06,
+        buyer_deadline=5,
+        seller_inventory=5,
+        seller_batna_probability=0.08,
+        enable_career=False,
+        success_threshold=0.4,
+    ),
+    "career_10": TaskConfig(
+        name="career_10",
+        difficulty="hard",
+        description=(
+            "Buyer plays 10 consecutive deals against same seller. Career history active. "
+            "Seller adapts concession rate based on buyer's historical capitulation rate. "
+            "Agent must manage reputation across episodes."
+        ),
+        max_steps=80,
+        total_episodes=10,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.07,
+        buyer_deadline=None,
+        seller_inventory=10,
+        seller_batna_probability=0.1,
+        enable_career=True,
+        success_threshold=0.5,
+    ),
+    # ── New personality-based tasks ──────────────────────────────
+    "deceptive_seller": TaskConfig(
+        name="deceptive_seller",
+        difficulty="hard",
+        description=(
+            "Seller bluffs about demand, fakes urgency, anchors 15% higher. "
+            "Tells leak deception cues -- verbal over-justification, fidgeting, "
+            "erratic concessions. Agent must read through the bluffs."
+        ),
+        max_steps=10,
+        total_episodes=1,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.06,
+        buyer_deadline=None,
+        seller_inventory=3,
+        seller_batna_probability=0.05,
+        enable_career=False,
+        success_threshold=0.35,
+        seller_personality=SellerPersonalityType.DECEPTIVE,
+        enable_tells=True,
+    ),
+    "impatient_seller": TaskConfig(
+        name="impatient_seller",
+        difficulty="medium",
+        description=(
+            "Seller concedes fast but walks fast. Shorter patience window. "
+            "Agent must close quickly or risk losing the deal. "
+            "Front-loaded concession pattern is the key tell."
+        ),
+        max_steps=8,
+        total_episodes=1,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.08,
+        buyer_deadline=None,
+        seller_inventory=1,
+        seller_batna_probability=0.15,
+        enable_career=False,
+        success_threshold=0.3,
+        seller_personality=SellerPersonalityType.IMPATIENT,
+        enable_tells=True,
+    ),
+    "collaborative_seller": TaskConfig(
+        name="collaborative_seller",
+        difficulty="easy",
+        description=(
+            "Seller seeks fair deals, concedes toward midpoint. Lower anchor, "
+            "tighter margins. Agent should reciprocate to maximize joint surplus. "
+            "Tests whether agent adapts to cooperative opponents."
+        ),
+        max_steps=8,
+        total_episodes=1,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.10,
+        buyer_deadline=None,
+        seller_inventory=1,
+        seller_batna_probability=0.02,
+        enable_career=False,
+        success_threshold=0.4,
+        seller_personality=SellerPersonalityType.COLLABORATIVE,
+        enable_tells=True,
+    ),
+    "read_the_tells": TaskConfig(
+        name="read_the_tells",
+        difficulty="expert",
+        description=(
+            "Deceptive seller with strong tells. Agent gets bonus score for "
+            "exploiting tells -- closing below midpoint when deception cues are high "
+            "indicates the agent read the bluff. Game theory meets poker."
+        ),
+        max_steps=10,
+        total_episodes=5,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.2,
+        seller_concession_rate=0.05,
+        buyer_deadline=None,
+        seller_inventory=5,
+        seller_batna_probability=0.08,
+        enable_career=True,
+        success_threshold=0.45,
+        seller_personality=SellerPersonalityType.DECEPTIVE,
+        enable_tells=True,
+    ),
+    "marketplace_arena": TaskConfig(
+        name="marketplace_arena",
+        difficulty="expert",
+        description=(
+            "Multi-buyer marketplace: 2-3 buyers compete for the same item from one seller. "
+            "Buyers can signal cooperation or competition. "
+            "Seller plays buyers against each other. Facebook Marketplace dynamics."
+        ),
+        max_steps=12,
+        total_episodes=1,
+        buyer_budget=100.0,
+        seller_cost=30.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.06,
+        buyer_deadline=None,
+        seller_inventory=1,
+        seller_batna_probability=0.05,
+        enable_career=False,
+        success_threshold=0.3,
+        seller_personality=SellerPersonalityType.DEFAULT,
+        enable_tells=True,
+        num_buyers=3,
+        enable_coalition=True,
+    ),
+    "amazon_realistic": TaskConfig(
+        name="amazon_realistic",
+        difficulty="medium",
+        description=(
+            "Single-deal negotiation over a real Amazon listing. Item, MRP, and "
+            "street price sampled per episode from data/amazon.csv. "
+            "Forces generalization across product categories and price magnitudes."
+        ),
+        max_steps=8,
+        total_episodes=1,
+        # buyer_budget / seller_cost are ignored when use_real_listings=True;
+        # kept here as fallbacks if the CSV is missing on the runtime.
+        buyer_budget=1000.0,
+        seller_cost=400.0,
+        seller_anchor_multiplier=2.0,
+        seller_concession_rate=0.08,
+        buyer_deadline=None,
+        seller_inventory=1,
+        seller_batna_probability=0.05,
+        enable_career=False,
+        success_threshold=0.3,
+        seller_personality=SellerPersonalityType.DEFAULT,
+        enable_tells=True,
+        use_real_listings=True,
+    ),
+}
+# ── Graders ───────────────────────────────────────────────────────
+def grade_single_deal(results: list[DealRecord], task: TaskConfig) -> float:
+    if not results:
+        return 0.0
+    deal = results[0]
+    if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
+        return 0.0
+    surplus = task.buyer_budget - deal.agreed_price
+    max_surplus = task.buyer_budget - task.seller_cost
+    if max_surplus <= 0:
+        return 0.0
+    score = surplus / max_surplus
+    return max(0.0, min(1.0, score))
+def grade_asymmetric_pressure(results: list[DealRecord], task: TaskConfig) -> float:
+    if not results:
+        return 0.0
+    deal = results[0]
+    if deal.outcome == DealOutcome.WALK:
+        return 0.0
+    if deal.outcome == DealOutcome.EXPIRED:
+        return 0.0
+    if deal.agreed_price is None:
+        return 0.0
+    surplus = task.buyer_budget - deal.agreed_price
+    max_surplus = task.buyer_budget - task.seller_cost
+    surplus_score = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
+    deadline = task.buyer_deadline or 5
+    deadline_bonus = 1.0 if deal.rounds_taken <= deadline else 0.5
+    score = surplus_score * deadline_bonus
+    return max(0.0, min(1.0, score))
+def grade_career_10(results: list[DealRecord], task: TaskConfig) -> float:
+    if not results:
+        return 0.0
+    rounds_per_ep = task.max_steps // task.total_episodes
+    weighted_scores = []
+    for deal in results:
+        if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
+            weighted_scores.append(0.0)
+            continue
+        surplus = task.buyer_budget - deal.agreed_price
+        max_surplus = task.buyer_budget - task.seller_cost
+        norm_surplus = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
+        efficiency = max(0.0, 1.0 - (deal.rounds_taken / rounds_per_ep) * 0.3)
+        weighted_scores.append(norm_surplus * efficiency)
+    score = sum(weighted_scores) / max(len(weighted_scores), 1)
+    return max(0.0, min(1.0, score))
+def grade_personality_task(results: list[DealRecord], task: TaskConfig) -> float:
+    """Generic grader for personality tasks -- same as single_deal but per-episode mean."""
+    if not results:
+        return 0.0
+    scores = []
+    for deal in results:
+        if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
+            scores.append(0.0)
+            continue
+        surplus = task.buyer_budget - deal.agreed_price
+        max_surplus = task.buyer_budget - task.seller_cost
+        norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
+        scores.append(norm)
+    return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))
+def grade_read_the_tells(results: list[DealRecord], task: TaskConfig) -> float:
+    """Bonus for reading deception -- closing well below midpoint earns extra."""
+    if not results:
+        return 0.0
+    midpoint = (task.buyer_budget + task.seller_cost) / 2
+    scores = []
+    for deal in results:
+        if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
+            scores.append(0.0)
+            continue
+        surplus = task.buyer_budget - deal.agreed_price
+        max_surplus = task.buyer_budget - task.seller_cost
+        norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
+        # Bonus for closing below midpoint (reading the bluff)
+        if deal.agreed_price < midpoint:
+            bluff_bonus = 0.15 * ((midpoint - deal.agreed_price) / (midpoint - task.seller_cost))
+            norm = min(1.0, norm + bluff_bonus)
+        scores.append(norm)
+    return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))
+def grade_amazon_realistic(results: list[DealRecord], task: TaskConfig) -> float:
+    """Grader for real-listing tasks: relies on per-episode normalized_surplus
+    (which uses the seller's episode cost, not the task's default cost)."""
+    if not results:
+        return 0.0
+    deal = results[0]
+    if deal.outcome != DealOutcome.DEAL:
+        return 0.0
+    return max(0.0, min(1.0, deal.normalized_surplus))
+GRADERS = {
+    "single_deal": grade_single_deal,
+    "asymmetric_pressure": grade_asymmetric_pressure,
+    "career_10": grade_career_10,
+    "deceptive_seller": grade_personality_task,
+    "impatient_seller": grade_personality_task,
+    "collaborative_seller": grade_personality_task,
+    "read_the_tells": grade_read_the_tells,
+    "marketplace_arena": grade_personality_task,
+    "amazon_realistic": grade_amazon_realistic,
+}

nlp/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .extractor import TellExtractor
2	+
3	+ __all__ = ["TellExtractor"]

nlp/eval_extractor.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""Evaluate the NLP tell extractor against Chicago HAI human-labeled rows.
+Compares ministral-3:3b zero-shot extraction to Chicago HAI ground-truth labels:
+- firm_soft → verbal_confidence (binary: f=high, s=low)
+- external_incentive=y → verbal_deception_cue (claim of outside pressure)
+- category → loosely informs urgency/confidence
+Also runs a rule-based control baseline (no LLM) for comparison.
+Logs to runs/{ts}_extractor_eval/ via RunLogger.
+Usage:
+    PYTHONPATH=. .venv/bin/python nlp/eval_extractor.py [--n 500] [--model ministral-3:3b]
+"""
+import argparse
+import json
+import pathlib
+import time
+from collections import defaultdict
+from nlp.extractor import TellExtractor, _condition_from_text, DEFAULT_TELL
+from utils.run_logger import RunLogger
+LABELED_ROWS = pathlib.Path("nlp/data/chicago_hai_bargaining.jsonl")
+def load_labeled(min_len: int = 10) -> list[dict]:
+    """Load Chicago HAI rows that have at least one human label."""
+    rows = []
+    with open(LABELED_ROWS) as f:
+        for line in f:
+            r = json.loads(line)
+            has_label = bool(r["category"] or r["firm_soft"] or r["external_incentive"])
+            if has_label and len(r["utterance"]) >= min_len:
+                rows.append(r)
+    return rows
+def rule_based_predict(utterance: str) -> dict:
+    """Control baseline: condition rules only, defaults elsewhere."""
+    cond_score, dep_score, cond_label = _condition_from_text(utterance)
+    out = dict(DEFAULT_TELL)
+    out["condition_score"] = cond_score
+    out["depreciation_score"] = dep_score
+    out["condition_label"] = cond_label
+    return out
+def score_row(predicted: dict, gold: dict, row: dict) -> dict:
+    """Per-row scoring against Chicago HAI labels."""
+    out = {
+        "abs_err_urgency": abs(predicted["verbal_urgency"] - gold["verbal_urgency"]),
+        "abs_err_confidence": abs(predicted["verbal_confidence"] - gold["verbal_confidence"]),
+        "abs_err_deception": abs(predicted["verbal_deception_cue"] - gold["verbal_deception_cue"]),
+    }
+    # Binary firm/soft accuracy: gold confidence ≥ 0.5 = firm
+    if row["firm_soft"]:
+        gold_firm = row["firm_soft"] == "f"
+        pred_firm = predicted["verbal_confidence"] >= 0.5
+        out["firm_correct"] = int(gold_firm == pred_firm)
+    # External incentive (deception) recall: gold y → pred deception ≥ 0.4
+    if row["external_incentive"] == "y":
+        out["deception_flagged"] = int(predicted["verbal_deception_cue"] >= 0.4)
+    return out
+def aggregate(per_row: list[dict]) -> dict:
+    """Roll up per-row scores into a summary."""
+    sums = defaultdict(list)
+    for r in per_row:
+        for k, v in r.items():
+            if isinstance(v, (int, float)) and not isinstance(v, bool):
+                sums[k].append(v)
+    return {f"mean_{k}": round(sum(v) / len(v), 4) for k, v in sums.items() if v}
+def run_pass(rows: list[dict], predict_fn, name: str, log) -> dict:
+    """Run one extraction pass over labeled rows."""
+    print(f"\n[{name}] running on {len(rows)} rows ...")
+    per_row = []
+    t0 = time.time()
+    for i, row in enumerate(rows):
+        try:
+            pred = predict_fn(row["utterance"])
+        except Exception as e:
+            print(f"  ! row {i} failed: {e}")
+            continue
+        scored = score_row(pred, row["tell_supervision"], row)
+        log.metric({
+            **scored,
+            "pass": name,
+            "row_idx": i,
+            "utterance_preview": row["utterance"][:80],
+        })
+        per_row.append(scored)
+        if (i + 1) % 50 == 0:
+            elapsed = time.time() - t0
+            rate = (i + 1) / elapsed
+            eta = (len(rows) - i - 1) / rate
+            print(f"  [{i+1}/{len(rows)}]  {rate:.2f} rows/s  ETA {eta:.0f}s")
+    elapsed = time.time() - t0
+    print(f"[{name}] done in {elapsed:.1f}s")
+    summary = aggregate(per_row)
+    summary["n"] = len(per_row)
+    summary["elapsed_s"] = round(elapsed, 1)
+    return summary
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--n", type=int, default=500, help="Cap on labeled rows")
+    parser.add_argument("--model", type=str, default="ministral-3:3b")
+    parser.add_argument("--seed", type=int, default=42)
+    args = parser.parse_args()
+    import random
+    random.seed(args.seed)
+    rows = load_labeled()
+    print(f"Loaded {len(rows)} labeled rows from Chicago HAI")
+    if args.n and args.n < len(rows):
+        rows = random.sample(rows, args.n)
+        print(f"Sampled {args.n} rows for eval")
+    extractor = TellExtractor(model=args.model)
+    with RunLogger("extractor_eval") as log:
+        log.config({
+            "model": args.model,
+            "n_rows": len(rows),
+            "seed": args.seed,
+            "labeled_source": "chicago_hai_bargaining.jsonl",
+        })
+        rule_summary = run_pass(rows, rule_based_predict, "rule_based", log)
+        ministral_summary = run_pass(rows, extractor.extract, args.model, log)
+        comparison = {
+            "rule_based": rule_summary,
+            args.model: ministral_summary,
+            "deltas": {
+                k.replace("mean_", "delta_"): ministral_summary.get(k, 0) - rule_summary.get(k, 0)
+                for k in rule_summary
+                if k.startswith("mean_") and k in ministral_summary
+            },
+        }
+        log.summary(comparison)
+        print("\n=== SUMMARY ===")
+        print(json.dumps(comparison, indent=2))
+        print(f"\nFull logs: {log.dir}")
+if __name__ == "__main__":
+    main()

nlp/extractor.py ADDED Viewed

	@@ -0,0 +1,326 @@

+"""NLP Tell Extractor — reads seller utterances, returns TellObservation.
+Uses a local Ollama model (default: gemma4:e2b) to extract structured signals
+from free-text seller messages. Output schema matches TellObservation in
+bazaarbot_env/models.py — same fields, same ranges.
+The extractor runs as a post-processing step after the seller speaks. For the
+rule-based seller it's a cross-check; for the LLM seller it's the primary
+tell source.
+Usage:
+    from nlp.extractor import TellExtractor
+    extractor = TellExtractor()
+    tells = extractor.extract("bhai last price hai, kal se badhega", history=[...])
+Standalone test:
+    python nlp/extractor.py
+"""
+from __future__ import annotations
+import json
+import re
+import textwrap
+from typing import Optional
+import requests
+OLLAMA_URL = "http://localhost:11434/api/generate"
+DEFAULT_MODEL = "ministral-3:3b"
+# ── Condition vocabulary ──────────────────────────────────────────
+# eBay standardized grades → (condition_score, depreciation_score, label)
+CONDITION_GRADES: list[tuple[list[str], float, float, str]] = [
+    (
+        ["new", "sealed", "mint", "mib", "mint in box", "brand new", "unused",
+         "box band", "seal pack", "sealed pack", "never opened", "factory sealed"],
+        1.0, 0.0, "new",
+    ),
+    (
+        ["like new", "open box", "barely used", "3 months", "6 months",
+         "thoda use", "thoda sa use", "bilkul sahi", "almost new", "excellent"],
+        0.85, 0.10, "like_new",
+    ),
+    (
+        ["very good", "vgc", "minor scratch", "ek chhota scratch", "small scratch",
+         "light scratch", "minor wear", "slight", "good condition"],
+        0.70, 0.25, "very_good",
+    ),
+    (
+        ["good", "guc", "some scratches", "few scratches", "normal wear",
+         "works perfectly", "fully functional", "theek kaam", "sahi kaam"],
+        0.55, 0.40, "good",
+    ),
+    (
+        ["acceptable", "heavy scratch", "dent", "battery low", "battery thodi kam",
+         "screen crack", "needs repair", "rough", "worn", "purana hai"],
+        0.35, 0.60, "acceptable",
+    ),
+    (
+        ["for parts", "broken", "dead", "not working", "kharab", "kaam nahi karta",
+         "damaged", "junk"],
+        0.10, 0.90, "junk",
+    ),
+]
+def _condition_from_text(text: str) -> tuple[float, float, str]:
+    """Rule-based fast pass for condition signals before LLM extraction."""
+    lower = text.lower()
+    for keywords, score, dep, label in CONDITION_GRADES:
+        for kw in keywords:
+            if kw in lower:
+                return score, dep, label
+    return 1.0, 0.0, "unknown"
+# ── Hinglish few-shot examples for the extractor prompt ──────────
+HINGLISH_FEW_SHOTS = """
+Utterance: "bhai last price hai, kal se price badhega"
+Tells: {"verbal_urgency": 0.75, "verbal_confidence": 0.6, "verbal_deception_cue": 0.5, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.3, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
+Utterance: "ek chhota sa scratch hai screen pe, baaki sab bilkul theek hai"
+Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.6, "verbal_deception_cue": 0.2, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.7, "depreciation_score": 0.25, "condition_label": "very_good"}
+Utterance: "abhi teen aur log dekh rahe hain, aaj hi lena padega"
+Tells: {"verbal_urgency": 0.8, "verbal_confidence": 0.7, "verbal_deception_cue": 0.75, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.4, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
+Utterance: "battery thodi kam hai, 79% hai, magar charger ke saath deta hoon"
+Tells: {"verbal_urgency": 0.2, "verbal_confidence": 0.5, "verbal_deception_cue": 0.1, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.35, "depreciation_score": 0.6, "condition_label": "acceptable"}
+Utterance: "box band hai, seal packed, maine khola bhi nahi"
+Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.8, "verbal_deception_cue": 0.0, "offer_speed": "normal", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "new"}
+""".strip()
+# Chicago HAI examples (English formal negotiations)
+CHICAGO_FEW_SHOTS = """
+Utterance: "I have another buyer coming in an hour, this is my final offer"
+Tells: {"verbal_urgency": 0.7, "verbal_confidence": 0.75, "verbal_deception_cue": 0.65, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.3, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
+Utterance: "Minor scratches on the back, fully functional, battery health 81%"
+Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.6, "verbal_deception_cue": 0.15, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.55, "depreciation_score": 0.4, "condition_label": "good"}
+Utterance: "Okay fine, I can do 4500, but that is absolutely the lowest I'll go"
+Tells: {"verbal_urgency": 0.5, "verbal_confidence": 0.55, "verbal_deception_cue": 0.3, "offer_speed": "deliberate", "concession_pattern": "front_loaded", "emotional_escalation": 0.35, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
+Utterance: "MIB, never opened, still has the plastic wrap on it"
+Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.85, "verbal_deception_cue": 0.0, "offer_speed": "normal", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "new"}
+""".strip()
+EXTRACTION_SYSTEM_PROMPT = textwrap.dedent(f"""\
+    You extract structured negotiation signals from a seller's message.
+    Output ONLY a single valid JSON object. No prose, no markdown, no explanation.
+    Output schema (all fields required):
+    {{
+      "verbal_urgency": <0.0–1.0, how desperate/pressured the seller sounds>,
+      "verbal_confidence": <0.0–1.0, how assertive/firm the seller sounds>,
+      "verbal_deception_cue": <0.0–1.0, signs of bluffing: social proof claims, fake scarcity, over-justification>,
+      "offer_speed": <"instant"|"normal"|"deliberate">,
+      "concession_pattern": <"steady"|"front_loaded"|"stalling"|"erratic">,
+      "emotional_escalation": <0.0–1.0, how emotionally charged the message is>,
+      "condition_score": <0.0–1.0, item condition from 0=junk to 1=mint. 1.0 if no condition info>,
+      "depreciation_score": <0.0–1.0, wear/damage level. 0.0 if no condition info>,
+      "condition_label": <"new"|"like_new"|"very_good"|"good"|"acceptable"|"junk"|"unknown">
+    }}
+    Calibration rules:
+    - Social proof ("another buyer", "3 log dekh rahe", "bahut demand hai") → verbal_deception_cue ≥ 0.6
+    - "Final price", "last offer", "bilkul nahi jaaunga" → verbal_confidence ≥ 0.7
+    - Time pressure claims ("kal se badhega", "aaj hi") → verbal_urgency ≥ 0.65
+    - Condition disclosures lower condition_score from 1.0; no disclosure = keep 1.0
+    - "Firm" language = verbal_confidence ≥ 0.75; "Soft/flexible" = ≤ 0.35
+    Examples (Hinglish):
+    {HINGLISH_FEW_SHOTS}
+    Examples (English):
+    {CHICAGO_FEW_SHOTS}
+""")
+DEFAULT_TELL = {
+    "verbal_urgency": 0.2,
+    "verbal_confidence": 0.5,
+    "verbal_deception_cue": 0.0,
+    "offer_speed": "normal",
+    "concession_pattern": "steady",
+    "emotional_escalation": 0.0,
+    "condition_score": 1.0,
+    "depreciation_score": 0.0,
+    "condition_label": "unknown",
+}
+VALID_OFFER_SPEEDS = {"instant", "normal", "deliberate"}
+VALID_CONCESSION_PATTERNS = {"steady", "front_loaded", "stalling", "erratic"}
+VALID_CONDITION_LABELS = {"new", "like_new", "very_good", "good", "acceptable", "junk", "unknown"}
+def _clamp(v, lo=0.0, hi=1.0) -> float:
+    try:
+        return max(lo, min(hi, float(v)))
+    except (TypeError, ValueError):
+        return (lo + hi) / 2
+def _parse_extraction(raw: str) -> dict:
+    """Parse JSON from LLM output, clamp ranges, fill missing fields."""
+    s = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+    if "```" in s:
+        parts = s.split("```")
+        s = parts[1].lstrip("json").strip() if len(parts) >= 2 else s
+    start, end = s.find("{"), s.rfind("}") + 1
+    if start >= 0 and end > start:
+        s = s[start:end]
+    try:
+        parsed = json.loads(s)
+    except Exception:
+        return dict(DEFAULT_TELL)
+    out = dict(DEFAULT_TELL)
+    out["verbal_urgency"] = _clamp(parsed.get("verbal_urgency", out["verbal_urgency"]))
+    out["verbal_confidence"] = _clamp(parsed.get("verbal_confidence", out["verbal_confidence"]))
+    out["verbal_deception_cue"] = _clamp(parsed.get("verbal_deception_cue", out["verbal_deception_cue"]))
+    out["emotional_escalation"] = _clamp(parsed.get("emotional_escalation", out["emotional_escalation"]))
+    out["condition_score"] = _clamp(parsed.get("condition_score", out["condition_score"]))
+    out["depreciation_score"] = _clamp(parsed.get("depreciation_score", out["depreciation_score"]))
+    speed = parsed.get("offer_speed", "normal")
+    out["offer_speed"] = speed if speed in VALID_OFFER_SPEEDS else "normal"
+    pattern = parsed.get("concession_pattern", "steady")
+    out["concession_pattern"] = pattern if pattern in VALID_CONCESSION_PATTERNS else "steady"
+    label = parsed.get("condition_label", "unknown")
+    out["condition_label"] = label if label in VALID_CONDITION_LABELS else "unknown"
+    return out
+class TellExtractor:
+    """Extracts TellObservation fields from seller free text via Ollama."""
+    def __init__(self, model: str = DEFAULT_MODEL, ollama_url: str = OLLAMA_URL):
+        self.model = model
+        self.ollama_url = ollama_url
+    def _call_ollama(self, prompt: str) -> str:
+        payload = {
+            "model": self.model,
+            "prompt": prompt,
+            "stream": False,
+            "options": {"temperature": 0.1, "num_predict": 256},
+        }
+        try:
+            resp = requests.post(self.ollama_url, json=payload, timeout=120)
+            resp.raise_for_status()
+            return resp.json().get("response", "")
+        except Exception as e:
+            print(f"[extractor] Ollama call failed: {e}")
+            return ""
+    def extract(
+        self,
+        message: str,
+        history: Optional[list[str]] = None,
+        fast: bool = False,
+    ) -> dict:
+        """Extract tell signals from a seller utterance.
+        Args:
+            message: the seller's current utterance
+            history: last N turns as strings (for context)
+            fast: if True, skip LLM and use rule-based condition extraction only
+                  (useful during GRPO rollouts where latency matters)
+        Returns:
+            dict matching TellObservation field names
+        """
+        # Fast path: rule-based condition extraction, defaults for everything else
+        cond_score, dep_score, cond_label = _condition_from_text(message)
+        if fast:
+            result = dict(DEFAULT_TELL)
+            result["condition_score"] = cond_score
+            result["depreciation_score"] = dep_score
+            result["condition_label"] = cond_label
+            return result
+        history_block = ""
+        if history:
+            recent = history[-3:]
+            history_block = "\nRecent conversation:\n" + "\n".join(recent) + "\n"
+        user_prompt = (
+            f"{history_block}"
+            f'\nSeller says: "{message}"\n\n'
+            "Extract tells as JSON:"
+        )
+        full_prompt = EXTRACTION_SYSTEM_PROMPT + "\n\n" + user_prompt
+        raw = self._call_ollama(full_prompt)
+        if not raw:
+            result = dict(DEFAULT_TELL)
+            result["condition_score"] = cond_score
+            result["depreciation_score"] = dep_score
+            result["condition_label"] = cond_label
+            return result
+        result = _parse_extraction(raw)
+        # Rule-based condition always wins over LLM for condition fields.
+        # Keyword matching on explicit condition phrases ("minor scratches",
+        # "box band", "MIB") is more reliable than LLM inference for this
+        # narrow vocabulary. LLM is better at urgency/deception where context
+        # and tone matter more than keyword lookup.
+        if cond_label != "unknown":
+            result["condition_score"] = cond_score
+            result["depreciation_score"] = dep_score
+            result["condition_label"] = cond_label
+        return result
+    def batch_extract(self, messages: list[str]) -> list[dict]:
+        return [self.extract(m) for m in messages]
+# ── Standalone test ───────────────────────────────────────────────
+TEST_UTTERANCES = [
+    # Hinglish urgency + social proof (deceptive)
+    "bhai last price hai, abhi teen aur log dekh rahe hain",
+    # Hinglish condition disclosure
+    "ek chhota sa scratch hai screen pe, battery 81% hai, baaki sab theek",
+    # Hinglish sealed
+    "box band hai, seal packed, maine kabhi khola nahi",
+    # English deceptive pressure
+    "I have another buyer coming in an hour, this is my absolute final offer",
+    # English condition
+    "Minor scratches on the back panel, fully functional, screen is perfect",
+    # English collaborative
+    "Look, I'll be honest with you — I paid 8000 for it, I just need 6500 to break even",
+    # eBay lingo
+    "MIB, never opened, still has factory seal",
+    # Impatient
+    "6000. Yes or no. I don't have all day.",
+]
+if __name__ == "__main__":
+    extractor = TellExtractor()
+    print(f"Using model: {extractor.model}\n")
+    print("=" * 60)
+    for utt in TEST_UTTERANCES:
+        print(f"Utterance: {utt}")
+        tells = extractor.extract(utt)
+        print(f"  urgency={tells['verbal_urgency']:.2f}  "
+              f"confidence={tells['verbal_confidence']:.2f}  "
+              f"deception={tells['verbal_deception_cue']:.2f}  "
+              f"speed={tells['offer_speed']}")
+        print(f"  condition={tells['condition_label']}  "
+              f"score={tells['condition_score']:.2f}  "
+              f"depreciation={tells['depreciation_score']:.2f}")
+        print()

nlp/fetch_datasets.py ADDED Viewed

	@@ -0,0 +1,361 @@

+"""Fetch and cache negotiation datasets used for NLP extractor supervision.
+Datasets:
+  1. stanfordnlp/craigslist_bargains  — per-turn intent labels (init-price/accept/reject)
+  2. ChicagoHAI/language-of-bargaining — per-turn bargaining act + Firm/Soft + External Incentive
+  3. casino                            — multi-issue strategy annotations
+Run:
+    python nlp/fetch_datasets.py
+Outputs written to nlp/data/:
+    craigslist_bargains.jsonl
+    chicago_hai_bargaining.jsonl
+    casino.jsonl
+    extractor_supervision.jsonl   ← merged supervision set for NLP extractor fine-tune
+"""
+import json
+import pathlib
+from datasets import load_dataset
+OUT = pathlib.Path(__file__).parent / "data"
+OUT.mkdir(exist_ok=True)
+# ── Chicago HAI: Category → verbal tell mapping ───────────────────
+# Derived from ACL 2023 paper taxonomy
+CHICAGO_CATEGORY_MAP = {
+    "offer":             {"verbal_urgency": 0.2, "verbal_confidence": 0.7},
+    "counter-offer":     {"verbal_urgency": 0.3, "verbal_confidence": 0.6},
+    "accept":            {"verbal_urgency": 0.1, "verbal_confidence": 0.8},
+    "reject":            {"verbal_urgency": 0.4, "verbal_confidence": 0.5},
+    "information":       {"verbal_urgency": 0.1, "verbal_confidence": 0.6},
+    "threat":            {"verbal_urgency": 0.7, "verbal_confidence": 0.8},
+    "appeal":            {"verbal_urgency": 0.5, "verbal_confidence": 0.4},
+    "other":             {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
+}
+FIRM_SOFT_MAP = {
+    "Firm": 0.85,
+    "Soft": 0.25,
+    "":     0.5,
+}
+# Non-empty External Incentive = social proof / bluff signal
+EXTERNAL_INCENTIVE_DECEPTION = 0.65
+# ── CaSiNo: strategy → tell mapping ──────────────────────────────
+# CaSiNo annotates with: no-need, self-need, other-need, vouch-fair,
+# showing-concern, no-deal, coordination, empathy, small-talk
+CASINO_STRATEGY_MAP = {
+    "no-need":          {"verbal_urgency": 0.1, "verbal_deception_cue": 0.3},
+    "self-need":        {"verbal_urgency": 0.6, "verbal_deception_cue": 0.1},
+    "other-need":       {"verbal_urgency": 0.3, "verbal_deception_cue": 0.4},
+    "vouch-fair":       {"verbal_urgency": 0.2, "verbal_confidence": 0.7},
+    "showing-concern":  {"verbal_urgency": 0.3, "verbal_confidence": 0.4},
+    "no-deal":          {"verbal_urgency": 0.5, "verbal_confidence": 0.8},
+    "coordination":     {"verbal_urgency": 0.2, "verbal_confidence": 0.6},
+    "empathy":          {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
+    "small-talk":       {"verbal_urgency": 0.05, "verbal_confidence": 0.5},
+}
+def _default_tell() -> dict:
+    return {
+        "verbal_urgency": 0.2,
+        "verbal_confidence": 0.5,
+        "verbal_deception_cue": 0.0,
+        "condition_score": 1.0,
+        "depreciation_score": 0.0,
+        "condition_label": "unknown",
+    }
+def fetch_craigslist():
+    # Load from local CodaLab downloads: data/train.json + data/dev.json (gzipped)
+    # Source: https://worksheets.codalab.org/worksheets/0x453913e76b65495d8b9730d41c7e0a0c
+    # Schema: events list with action in {message, offer, accept, reject, quit}
+    # No per-turn intent labels — derive from action type
+    import gzip, pathlib
+    ACTION_TELL_MAP = {
+        "message": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
+        "offer":   {"verbal_urgency": 0.35, "verbal_confidence": 0.7},
+        "accept":  {"verbal_urgency": 0.1, "verbal_confidence": 0.8},
+        "reject":  {"verbal_urgency": 0.45, "verbal_confidence": 0.55},
+        "quit":    {"verbal_urgency": 0.6, "verbal_confidence": 0.6},
+    }
+    rows = []
+    for split in ("train", "dev"):
+        path = pathlib.Path(f"data/{split}.json")
+        if not path.exists():
+            print(f"  ! data/{split}.json not found, skipping")
+            continue
+        print(f"  Loading data/{split}.json ...")
+        try:
+            with gzip.open(path) as f:
+                examples = json.load(f)
+        except Exception:
+            # Try plain JSON if not gzipped
+            examples = json.loads(path.read_text())
+        for ex in examples:
+            kbs = ex.get("scenario", {}).get("kbs", [{}, {}])
+            # agent 0 = buyer (Role in personal), agent 1 = seller
+            agent_roles = {}
+            for kb in kbs:
+                role = kb.get("personal", {}).get("Role", "")
+                # agent index inferred from role
+                if role == "buyer":
+                    agent_roles[0] = "buyer"
+                elif role == "seller":
+                    agent_roles[1] = "seller"
+            outcome = ex.get("outcome", {})
+            deal_price = (outcome.get("offer") or {}).get("price")
+            for ev in ex.get("events", []):
+                action = ev.get("action", "")
+                text = ev.get("data", "")
+                if action != "message" or not isinstance(text, str) or len(text) < 5:
+                    continue
+                agent_idx = ev.get("agent", 0)
+                role = agent_roles.get(agent_idx, "unknown")
+                tell = _default_tell()
+                tell.update(ACTION_TELL_MAP.get(action, {}))
+                # Derive condition signals from listing description if present
+                item = ex.get("scenario", {}).get("kbs", [{}])[0].get("item", {})
+                desc = " ".join(item.get("Description", []) or [])
+                if desc:
+                    try:
+                        from nlp.extractor import _condition_from_text
+                        cond_score, dep_score, cond_label = _condition_from_text(desc)
+                        if cond_label != "unknown":
+                            tell["condition_score"] = cond_score
+                            tell["depreciation_score"] = dep_score
+                            tell["condition_label"] = cond_label
+                    except ImportError:
+                        pass
+                rows.append({
+                    "source": "craigslist_bargains",
+                    "role": role,
+                    "utterance": text,
+                    "action": action,
+                    "deal_price": deal_price,
+                    "tell_supervision": tell,
+                })
+    path = OUT / "craigslist_bargains.jsonl"
+    with open(path, "w") as f:
+        for r in rows:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+    print(f"  → {len(rows)} turns written to {path}")
+    return rows
+    rows = []
+    for split in ("train", "validation"):
+        for ex in ds[split]:
+            utterances = ex["utterance"]
+            acts = ex["dialogue_acts"]
+            roles = [ex["agent_info"]["Role"][t] for t in ex["agent_turn"]]
+            item_price = ex["items"]["Price"][0] if ex["items"]["Price"] else None
+            for i, (utt, role) in enumerate(zip(utterances, roles)):
+                intent = acts["intent"][i] if acts and acts["intent"] else ""
+                price_val = acts["price"][i] if acts and acts["price"] else -1.0
+                tell = _default_tell()
+                if intent == "accept":
+                    tell["verbal_urgency"] = 0.1
+                    tell["verbal_confidence"] = 0.8
+                elif intent == "reject":
+                    tell["verbal_urgency"] = 0.4
+                    tell["verbal_confidence"] = 0.5
+                elif intent == "init-price":
+                    tell["verbal_confidence"] = 0.75
+                rows.append({
+                    "source": "craigslist_bargains",
+                    "role": role,
+                    "utterance": utt,
+                    "intent": intent,
+                    "price": float(price_val) if price_val and price_val != -1.0 else None,
+                    "item_price": float(item_price) if item_price else None,
+                    "tell_supervision": tell,
+                })
+    path = OUT / "craigslist_bargains.jsonl"
+    with open(path, "w") as f:
+        for r in rows:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+    print(f"  → {len(rows)} turns written to {path}")
+    return rows
+def fetch_chicago_hai():
+    # Load from local negotiations_public_release/nl/ — 178 JSON files
+    # HF version is broken; we have the data zip locally already.
+    # Label taxonomy (from data exploration):
+    #   Category: p=price, n=new-offer, c=counter, r=reject, a=accept, e=exit
+    #   Firm or Soft: f=firm, s=soft
+    #   External Incentive: y=yes (social proof / outside pressure claim)
+    import pathlib
+    nl_dir = pathlib.Path("negotiations_public_release/nl")
+    if not nl_dir.exists():
+        print("  ! negotiations_public_release/nl not found, skipping Chicago HAI")
+        return []
+    print(f"Loading Chicago HAI from {nl_dir} ({len(list(nl_dir.glob('*.json')))} files) ...")
+    CATEGORY_MAP = {
+        "p": {"verbal_urgency": 0.3, "verbal_confidence": 0.7},   # price proposal
+        "n": {"verbal_urgency": 0.4, "verbal_confidence": 0.65},  # new offer
+        "c": {"verbal_urgency": 0.35, "verbal_confidence": 0.6},  # counter
+        "r": {"verbal_urgency": 0.5, "verbal_confidence": 0.5},   # reject
+        "a": {"verbal_urgency": 0.1, "verbal_confidence": 0.8},   # accept
+        "e": {"verbal_urgency": 0.6, "verbal_confidence": 0.7},   # exit/walk
+    }
+    FIRM_MAP = {"f": 0.80, "s": 0.25}
+    rows = []
+    for fpath in sorted(nl_dir.glob("*.json")):
+        try:
+            raw = fpath.read_text().replace(": NaN", ": null")
+            d = json.loads(raw)
+        except Exception:
+            continue
+        for turn_words in d.get("turns", []):
+            if not isinstance(turn_words, list) or not turn_words:
+                continue
+            # Reconstruct utterance by joining Word fields
+            utterance = " ".join(
+                w.get("Word", "") for w in turn_words if w.get("Word")
+            ).strip()
+            if len(utterance) < 5:
+                continue
+            role = turn_words[0].get("Role", "")
+            # Take labels from last word that has them (annotation is span-level)
+            category, firm_soft, ext_incentive = "", "", ""
+            for w in reversed(turn_words):
+                if not category and w.get("Category"):
+                    category = str(w["Category"]).strip()
+                if not firm_soft and w.get("Firm or Soft"):
+                    firm_soft = str(w["Firm or Soft"]).strip()
+                if not ext_incentive and w.get("External Incentive"):
+                    ext_incentive = str(w["External Incentive"]).strip()
+            tell = _default_tell()
+            tell.update(CATEGORY_MAP.get(category, {}))
+            if firm_soft in FIRM_MAP:
+                tell["verbal_confidence"] = FIRM_MAP[firm_soft]
+            if ext_incentive == "y":
+                tell["verbal_deception_cue"] = EXTERNAL_INCENTIVE_DECEPTION
+            rows.append({
+                "source": "chicago_hai",
+                "role": role,
+                "utterance": utterance,
+                "category": category,
+                "firm_soft": firm_soft,
+                "external_incentive": ext_incentive,
+                "tell_supervision": tell,
+            })
+    path = OUT / "chicago_hai_bargaining.jsonl"
+    with open(path, "w") as f:
+        for r in rows:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+    print(f"  → {len(rows)} turns written to {path}")
+    return rows
+def fetch_casino():
+    print("Fetching casino (CaSiNo) ...")
+    try:
+        ds = load_dataset("casino", trust_remote_code=True)
+    except Exception as e:
+        print(f"  ! Could not load: {e}")
+        return []
+    rows = []
+    for split in ds.keys():
+        for ex in ds[split]:
+            chat = ex.get("chat_logs", [])
+            for turn in chat:
+                utt = turn.get("text", "")
+                if not utt:
+                    continue
+                role = turn.get("id", "")
+                # CaSiNo per-turn strategy is in annotations, not task_data
+                # task_data contains item allocation info, not strategy labels
+                # Strategy labels are in ex["annotations"] keyed by worker
+                strategy_label = ""
+                tell = _default_tell()
+                sig = CASINO_STRATEGY_MAP.get(strategy_label, {})
+                tell.update(sig)
+                rows.append({
+                    "source": "casino",
+                    "role": role,
+                    "utterance": utt,
+                    "strategy": strategy_label,
+                    "tell_supervision": tell,
+                })
+    path = OUT / "casino.jsonl"
+    with open(path, "w") as f:
+        for r in rows:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+    print(f"  → {len(rows)} turns written to {path}")
+    return rows
+def merge_supervision(craigslist, chicago, casino):
+    """Merge all sources into a single supervision set for extractor training.
+    Each row: {"utterance": str, "tell_supervision": dict}
+    Only rows with non-trivial utterances (len > 10) and non-default tells are kept.
+    """
+    all_rows = craigslist + chicago + casino
+    merged = []
+    for r in all_rows:
+        utt = r.get("utterance", "").strip()
+        tell = r.get("tell_supervision", {})
+        if len(utt) < 10:
+            continue
+        # Keep only rows where at least one tell deviates from defaults
+        non_default = (
+            tell.get("verbal_urgency", 0.2) != 0.2
+            or tell.get("verbal_confidence", 0.5) != 0.5
+            or tell.get("verbal_deception_cue", 0.0) != 0.0
+        )
+        if not non_default:
+            continue
+        merged.append({"utterance": utt, "source": r["source"], "tell_supervision": tell})
+    path = OUT / "extractor_supervision.jsonl"
+    with open(path, "w") as f:
+        for r in merged:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+    print(f"\nMerged supervision set: {len(merged)} rows → {path}")
+if __name__ == "__main__":
+    craigslist = fetch_craigslist()
+    chicago = fetch_chicago_hai()
+    casino = fetch_casino()
+    merge_supervision(craigslist, chicago, casino)
+    print("\nDone. Run nlp/extractor.py to test extraction against these.")

nlp/keyword_patterns.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""Keyword/phrase patterns for inline span-level tell highlighting.
+Used by the /highlight endpoint to show users which exact phrases in their
+message triggered which tell signal — Grammarly-style underlining in the
+chat bubble.
+Patterns mined from data/indian_negotiations.jsonl seller turns by strategy.
+Hand-curated and grouped by tell signal:
+    urgency:   "kal se", "abhi", "jaldi", "today only", "final price"
+    deception: "teen aur log dekh rahe", "other buyers", "kabhi nahi"
+    confidence: "market rate", "best price", "fixed price"
+    condition: "box pack", "scratch", "battery 81%", "abhi naya"
+Each pattern has:
+    - regex (case-insensitive, word-bounded where useful)
+    - signal it triggers (urgency / deception / confidence / condition)
+    - score it adds to that signal (0-1)
+    - one-line explanation shown in the hover card
+The frontend uses these to wrap matched spans in <mark> tags.
+"""
+from __future__ import annotations
+import re
+from typing import Literal, NamedTuple
+Signal = Literal["urgency", "deception", "confidence", "condition"]
+class Pattern(NamedTuple):
+    pattern: re.Pattern[str]
+    signal: Signal
+    score: float
+    explanation: str
+    """Human-readable label for the hover card."""
+def _p(regex: str, signal: Signal, score: float, explanation: str) -> Pattern:
+    return Pattern(re.compile(regex, re.IGNORECASE), signal, score, explanation)
+PATTERNS: list[Pattern] = [
+    # ── URGENCY ──────────────────────────────────────────────────
+    _p(r"\bkal\s+se\b", "urgency", 0.6, "Time pressure: 'price changes tomorrow'"),
+    _p(r"\babhi\b(?!\s+nahi)", "urgency", 0.4, "Hindi 'right now' — pushes immediate decision"),
+    _p(r"\bjaldi\b", "urgency", 0.6, "Hindi 'quickly' — explicit urgency"),
+    _p(r"\btoday\s+only\b", "urgency", 0.7, "Time pressure: limited window"),
+    _p(r"\bfinal\s+price\b", "urgency", 0.5, "Anchoring: 'this is final, no negotiation'"),
+    _p(r"\blast\s+price\b", "urgency", 0.5, "Anchoring: claims this is the bottom"),
+    _p(r"\bfix(?:ed)?\s+(?:hai|price)\b", "urgency", 0.4, "Position commitment: 'price is fixed'"),
+    _p(r"\bno\s+(?:more\s+)?negotiation\b", "urgency", 0.7, "Closes the door on further bargaining"),
+    _p(r"\btime\s+waste\b", "urgency", 0.5, "Impatience signal"),
+    _p(r"\bimmediately\b", "urgency", 0.4, "Demands same-instant action"),
+    _p(r"\bsend\s+(?:the\s+)?money\b", "urgency", 0.5, "Pushing toward immediate transaction"),
+    _p(r"\b(?:i'?m|im|i am)\s+(?:making\s+a\s+)?los(?:s|ing)\b", "urgency", 0.55,
+       "Loss-claim sympathy push — pressures buyer to feel bad about price"),
+    _p(r"\bnot?\s+making\s+(?:any\s+)?(?:profit|money)\b", "urgency", 0.5, "No-profit sympathy push"),
+    _p(r"\bbarely\s+breaking\s+even\b", "urgency", 0.5, "Sympathy push: claims zero margin"),
+    # ── DECEPTION ────────────────────────────────────────────────
+    # The classic: "teen aur log dekh rahe" (three other people are looking)
+    _p(r"\bteen\s+aur\s+log\b", "deception", 0.8,
+       "External-incentive bluff: claims multiple competing buyers (CaSiNo deception cue)"),
+    _p(r"\bother\s+(?:people|buyers?)\s+(?:are\s+)?looking\b", "deception", 0.8,
+       "External-incentive bluff: claims competing buyers"),
+    _p(r"\bothers\s+are\s+looking\b", "deception", 0.8, "External-incentive bluff"),
+    _p(r"\bkoi\s+aur\s+(?:buyer|log)\b", "deception", 0.7, "Claims another buyer is interested"),
+    _p(r"\baur\s+log\s+(?:bhi\s+)?dekh\b", "deception", 0.7, "Claims more people watching"),
+    _p(r"\bdemand\s+(?:zyada|high)\b", "deception", 0.4, "Claims market demand to justify price"),
+    _p(r"\bmarket\s+(?:mein\s+)?(?:bahut\s+)?demand\b", "deception", 0.4, "Claims market demand"),
+    _p(r"\bbest\s+price\b", "deception", 0.3, "Self-praise — soft anchoring"),
+    # Numeric "3 other offers" / "two more buyers" — same external-incentive bluff
+    # as "teen aur log" but in English with digits or number-words.
+    _p(r"\b(?:\d+|two|three|four|five|several|multiple|many)\s+(?:other\s+|more\s+)?(?:offers?|buyers?|people|interested)\b",
+       "deception", 0.75, "External-incentive bluff: claims competing offers/buyers"),
+    _p(r"\bgot\s+(?:\d+|two|three|four|five|several|multiple|other)\s+(?:offers?|buyers?)\b",
+       "deception", 0.75, "Claims existing competing offers"),
+    _p(r"\b(?:already\s+)?have\s+(?:\d+|two|three|four|other)\s+(?:offers?|buyers?)\b",
+       "deception", 0.75, "Claims existing competing offers"),
+    # ── CONFIDENCE ───────────────────────────────────────────────
+    _p(r"\bmarket\s+rate\b", "confidence", 0.6, "Confidence: anchoring to external price reference"),
+    _p(r"\bmarket\s+mein\s+iski\b", "confidence", 0.5, "Confidence: market positioning"),
+    _p(r"\bnahi\s+ho(?:\s+payega)?\b", "confidence", 0.6, "Firm refusal: 'won't happen'"),
+    _p(r"\bmushkil\s+hai\b", "confidence", 0.4, "Mild firmness: 'difficult'"),
+    _p(r"\bisse\s+(?:upar|kam)\s+nahi\b", "confidence", 0.7, "Hard floor/ceiling commitment"),
+    _p(r"\bnot?\s+(?:lower|higher)\b", "confidence", 0.6, "Position commitment"),
+    # ── CONDITION ────────────────────────────────────────────────
+    _p(r"\bbox\s+(?:band|pack|sealed?)\b", "condition", 0.95, "Item is sealed / new in box"),
+    _p(r"\bseal\s+packed?\b", "condition", 0.95, "New, factory-sealed"),
+    _p(r"\babhi\s+box\s+se\s+nikala\b", "condition", 0.9, "Just unboxed — like new"),
+    _p(r"\b(?:bilkul\s+)?naya\b", "condition", 0.85, "Hindi 'brand new'"),
+    _p(r"\b(?:like\s+new|mint)\b", "condition", 0.85, "Like-new condition"),
+    _p(r"\bbarely\s+used\b", "condition", 0.8, "Lightly used"),
+    _p(r"\bek\s+(?:chhota\s+)?scratch\b", "condition", 0.55, "Minor scratch — visible wear"),
+    _p(r"\b(?:minor\s+)?scratch(?:es)?\b", "condition", 0.55, "Minor cosmetic damage"),
+    _p(r"\bdent\b", "condition", 0.5, "Dent — moderate wear"),
+    _p(r"\bchip(?:ped)?\b", "condition", 0.5, "Chipped — visible damage"),
+    _p(r"\bscreen\s+(?:replaced|change)\b", "condition", 0.35,
+       "Screen replacement — depreciation indicator"),
+    _p(r"\bbattery\s+(?:health\s+)?(\d{2,3})\s*%?\b", "condition", 0.4,
+       "Battery health disclosure — wear indicator"),
+    _p(r"\b(\d{1,2})\s*(?:saal|year)s?\s+(?:purana|old)\b", "condition", 0.5,
+       "Age disclosure"),
+    _p(r"\bkabhi\s+giraya\s+nahi\b", "condition", 0.85, "Never dropped — careful owner"),
+    _p(r"\boriginal\s+(?:box|charger|warranty)\b", "condition", 0.75,
+       "Has original accessories"),
+    _p(r"\bwarranty\b", "condition", 0.7, "Has warranty"),
+    _p(r"\bperfect\s+condition\b", "condition", 0.85, "Perfect condition claim"),
+    _p(r"\bworking\s+condition\b", "condition", 0.7, "Functional but unspecified wear"),
+]
+class Match(NamedTuple):
+    start: int
+    end: int
+    text: str
+    signal: Signal
+    score: float
+    explanation: str
+def find_matches(message: str) -> list[Match]:
+    """Find all pattern matches in `message`. Returns char-offset spans."""
+    matches: list[Match] = []
+    for pat in PATTERNS:
+        for m in pat.pattern.finditer(message):
+            matches.append(
+                Match(
+                    start=m.start(),
+                    end=m.end(),
+                    text=m.group(0),
+                    signal=pat.signal,
+                    score=pat.score,
+                    explanation=pat.explanation,
+                )
+            )
+    return matches
+def aggregate_signals(matches: list[Match]) -> dict[str, float]:
+    """Roll up per-signal max score across matches."""
+    rolled: dict[str, float] = {}
+    for m in matches:
+        rolled[m.signal] = max(rolled.get(m.signal, 0.0), m.score)
+    return rolled

nlp/setup_ministral.sh ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env bash
+# Waits for the Indian negotiation generator to finish, then:
+#   1. Pulls ministral-3:3b into Ollama
+#   2. Swaps it as the default extractor model
+#   3. Runs the extractor test to verify quality
+#
+# Run with: bash nlp/setup_ministral.sh &
+# Log:       /tmp/setup_ministral.log
+set -euo pipefail
+LOG=/tmp/setup_ministral.log
+TARGET=500
+JSONL=data/indian_negotiations.jsonl
+EXTRACTOR=nlp/extractor.py
+log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; }
+cd /home/meow/Documents/Projects/MetaThon
+log "Watching generator — waiting for $TARGET conversations in $JSONL ..."
+while true; do
+    if [ -f "$JSONL" ]; then
+        count=$(wc -l < "$JSONL")
+        log "Progress: $count / $TARGET conversations"
+        if [ "$count" -ge "$TARGET" ]; then
+            log "Generator done."
+            break
+        fi
+    else
+        log "Output file not found yet, waiting..."
+    fi
+    # Also stop waiting if the generator process is gone and file exists
+    if [ -f "$JSONL" ] && ! pgrep -f generate_indian_negotiations.py > /dev/null 2>&1; then
+        count=$(wc -l < "$JSONL")
+        log "Generator process ended with $count conversations. Proceeding."
+        break
+    fi
+    sleep 120
+done
+log "Pulling ministral-3:3b ..."
+ollama pull ministral-3:3b 2>&1 | tee -a "$LOG"
+log "Verifying ministral-3:3b is available ..."
+ollama list | tee -a "$LOG"
+log "Running extractor test with ministral-3:3b ..."
+PYTHONPATH=. .venv/bin/python "$EXTRACTOR" 2>&1 | tee -a "$LOG"
+log "All done. Check $LOG for extractor quality results."

nlp/templates.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""Buyer-message templates for SFT targets and steerer-override fallback.
+Templates are bucketed by:
+  action_kind ∈ {offer_low, offer_mid, offer_high, accept, walk}
+  register    ∈ {firm, soft, polite, curt, final}  — escalating tone
+Use the `turn_index` in render() to bias toward `final` register on later turns
+(round-aware escalation: opening turns sound exploratory, late turns sound terminal).
+The bank avoids "yaar" (informal/casual filler) and over-uses of "bhai" — keeps the
+buyer's voice grounded in Hinglish-leaning English without sounding like a street vendor.
+"""
+import random
+from typing import Optional
+# Each entry: (register, template). All offer templates have {price}.
+TEMPLATES: dict[str, list[tuple[str, str]]] = {
+    "offer_low": [
+        ("firm",   "{price} max de sakta hu, isse upar nahi"),
+        ("firm",   "{price} mera offer, isse zyada nahi"),
+        ("firm",   "{price} pe baat banegi, warna nahi"),
+        ("firm",   "{price} hai bas, final"),
+        ("soft",   "{price} mein de dijiye please"),
+        ("soft",   "{price} chalega bhai?"),
+        ("soft",   "{price} kar lo, deal pakki"),
+        ("polite", "market mein {price} mein mil jaata hai, dekh lijiye"),
+        ("polite", "honestly bhai, {price} fair lagta hai mujhe"),
+        ("polite", "{price} reasonable hai, condition dekh ke"),
+        ("curt",   "{price}. le ya jaa."),
+        ("curt",   "{price}, last from my side"),
+        ("final",  "okay, {price} mera final offer hai"),
+        ("final",  "{price} ya nahi — beyond this I walk"),
+    ],
+    "offer_mid": [
+        ("firm",   "{price} pe karte hain deal"),
+        ("firm",   "{price} works for me, lock kar do"),
+        ("firm",   "chalo, {price} pe baat khatam"),
+        ("soft",   "{price} chalega bhai?"),
+        ("soft",   "{price} mein ho jaye?"),
+        ("soft",   "thoda kam karo, {price} pe finalize?"),
+        ("polite", "{price} fair hai dono ke liye"),
+        ("polite", "{price} sahi rate lagta hai mujhe"),
+        ("curt",   "{price}. that's where I am"),
+        ("curt",   "{price}, isse upar nahi"),
+        ("final",  "{price} ya I'm out"),
+        ("final",  "this is my last move — {price}"),
+    ],
+    "offer_high": [
+        ("firm",   "okay, {price} but that's the limit"),
+        ("firm",   "{price}, isse upar nahi ja sakta"),
+        ("soft",   "{price} okay? close kar dete hain"),
+        ("soft",   "fine, {price} mein le leta hu"),
+        ("polite", "{price} stretch kar raha hu, condition fair lagi"),
+        ("polite", "{price} de raha hu since you've been reasonable"),
+        ("curt",   "{price}. done?"),
+        ("curt",   "{price}, last bid"),
+        ("final",  "okay {price} — bas yahi ceiling hai"),
+        ("final",  "{price} pe close ya I walk"),
+    ],
+    "accept": [
+        ("firm",   "deal."),
+        ("firm",   "done."),
+        ("firm",   "chalo, deal."),
+        ("soft",   "okay, le leta hu"),
+        ("soft",   "theek hai, kar lete hain"),
+        ("polite", "fair, accepted"),
+        ("polite", "sounds good, deal pakki"),
+        ("curt",   "haan."),
+        ("curt",   "ho gaya, done"),
+        ("final",  "deal, close kar dete hain"),
+        ("final",  "okay, isi pe lock"),
+    ],
+    "walk": [
+        ("firm",   "nahi yaar nahi, ye nahi ho payega"),
+        ("firm",   "budget mein nahi aa raha, passing"),
+        ("soft",   "thanks for your time, dekhte hain phir kabhi"),
+        ("soft",   "appreciate it, but is price pe nahi"),
+        ("polite", "gap zyada hai, mujhe pass karna hoga"),
+        ("polite", "respect your floor, but mere liye nahi banega"),
+        ("curt",   "no deal."),
+        ("curt",   "passing, thanks"),
+        ("final",  "bahut difference hai — walking"),
+        ("final",  "is price pe nahi, goodbye"),
+    ],
+}
+def _bucket_for_offer(price: float, ask: float) -> str:
+    """Classify an offer price as low/mid/high relative to seller's ask."""
+    if ask <= 0:
+        return "offer_mid"
+    ratio = price / ask
+    if ratio < 0.55:
+        return "offer_low"
+    if ratio < 0.80:
+        return "offer_mid"
+    return "offer_high"
+def _register_for_turn(turn_index: int, max_turns: int = 8) -> Optional[str]:
+    """Bias register based on turn position.
+    - Turns 0-1 (opening): polite or soft
+    - Turns 2-4 (mid):     firm or soft
+    - Turns 5+  (late):    curt or final
+    """
+    if turn_index < 0:
+        return None
+    progress = turn_index / max(1, max_turns)
+    if progress < 0.25:
+        return random.choice(["polite", "soft"])
+    if progress < 0.65:
+        return random.choice(["firm", "soft"])
+    return random.choice(["curt", "final"])
+def render(
+    action: str,
+    price: Optional[float],
+    ask: Optional[float] = None,
+    intent: Optional[str] = None,
+    turn_index: Optional[int] = None,
+    max_turns: int = 8,
+    used_history: Optional[set[str]] = None,
+    rng: Optional[random.Random] = None,
+) -> str:
+    """Pick a template, render it with the given price, avoid recent repeats.
+    Args:
+        action: 'offer' | 'accept' | 'walk'
+        price: numeric price for offers; None for accept/walk
+        ask: seller's current ask (used to bucket offer price)
+        intent: explicit register override ('firm'|'soft'|'polite'|'curt'|'final')
+        turn_index: current round number — biases register toward 'final' as it grows
+        max_turns: typical episode length used for normalizing turn_index
+        used_history: set of templates already rendered this episode (avoid repeats)
+        rng: optional Random instance for reproducibility
+    Returns:
+        A natural-language line, with {price} slot filled.
+    """
+    rng = rng or random
+    if action == "offer":
+        bucket = _bucket_for_offer(price or 0, ask or (price or 0))
+    elif action == "accept":
+        bucket = "accept"
+    elif action == "walk":
+        bucket = "walk"
+    else:
+        return ""
+    candidates = TEMPLATES.get(bucket, [])
+    if not candidates:
+        return ""
+    # Determine register: explicit > turn-based > random
+    register = intent or (
+        _register_for_turn(turn_index, max_turns) if turn_index is not None else None
+    )
+    register_pool = [(r, t) for r, t in candidates if r == register] if register else list(candidates)
+    if not register_pool:
+        register_pool = list(candidates)
+    def _materialize(tmpl: str) -> str:
+        if "{price}" in tmpl and price is not None:
+            return tmpl.format(price=int(round(price)))
+        return tmpl
+    # `used_history` stores rendered messages, so compare against the materialized form.
+    # Variety > register fidelity when buyer is stuck — widen to all registers
+    # before allowing repeats.
+    if used_history:
+        fresh_in_register = [(r, t) for r, t in register_pool if _materialize(t) not in used_history]
+        if fresh_in_register:
+            pool = fresh_in_register
+        else:
+            fresh_anywhere = [(r, t) for r, t in candidates if _materialize(t) not in used_history]
+            pool = fresh_anywhere or register_pool
+    else:
+        pool = register_pool
+    _, tmpl = rng.choice(pool)
+    return _materialize(tmpl)

server/main.py CHANGED Viewed

@@ -4,10 +4,11 @@ from __future__ import annotations
 import copy
 import json
 from contextlib import asynccontextmanager
 from typing import Optional
-from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
@@ -91,6 +92,20 @@ class ArenaStepRequest(BaseModel):
 # ── App state ─────────────────────────────────────────────────────
 _envs: dict[str, BazaarEnvironment] = {}
 _arenas: dict[str, MultiBuyerArena] = {}
 _ws_connections: dict[str, list[WebSocket]] = {}
@@ -337,6 +352,90 @@ async def health():
     return {"status": "ok", "version": "2.0.0"}
 # ── Simulate (AI auto-play for spectator mode) ──────────────────
 class SimulateRequest(BaseModel):
@@ -356,8 +455,50 @@ class SellerModeStepRequest(BaseModel):
     price: float
-def _ai_buyer_action(obs: BazaarObservation, strategy: str, rng) -> BazaarAction:
-    """Built-in AI buyer strategies for spectator / seller mode."""
     budget = obs.own_private_budget
     ask = obs.seller_asking_price
     opp = obs.opponent_last_offer or ask
@@ -545,15 +686,33 @@ class SellerModeResetRequest(BaseModel):
     strategy: str = "smart"
     seed: Optional[int] = None
     opening_price: float = 60.0
 @app.post("/seller-mode/reset")
-async def seller_mode_reset(req: SellerModeResetRequest):
     """Start a seller-mode session. User plays as seller, AI plays as buyer."""
     if req.task not in TASKS:
         raise HTTPException(status_code=400, detail=f"Unknown task: {req.task}")
     task = copy.deepcopy(TASKS[req.task])
     # Store seller mode state
     import random
     session = {
@@ -562,8 +721,8 @@ async def seller_mode_reset(req: SellerModeResetRequest):
         "rng": random.Random(req.seed),
         "round": 0,
         "max_rounds": task.max_steps if task.total_episodes == 1 else task.max_steps // task.total_episodes,
-        "buyer_budget": task.buyer_budget,
-        "seller_cost": task.seller_cost,
         "current_seller_price": req.opening_price,
         "last_buyer_offer": None,
         "history": [],
@@ -577,17 +736,21 @@ async def seller_mode_reset(req: SellerModeResetRequest):
         current_round=0,
         max_rounds=session["max_rounds"],
         opponent_last_offer=req.opening_price,
-        own_private_budget=task.buyer_budget,
         rounds_remaining=session["max_rounds"],
         seller_asking_price=req.opening_price,
-        item_name="handwoven silk scarf",
         message=f"You open at {req.opening_price:.0f} rupees.",
     )
     # AI buyer makes first offer
-    action = _ai_buyer_action(obs, req.strategy, session["rng"])
     session["round"] = 1
     session["last_buyer_offer"] = action.price
     session["history"].append({
         "round": 0,
         "actor": "seller",
@@ -599,9 +762,10 @@ async def seller_mode_reset(req: SellerModeResetRequest):
         "actor": "buyer",
         "action": action.action.value if hasattr(action.action, 'value') else action.action,
         "price": action.price,
     })
-    buyer_msg = (
         f"Buyer offers {action.price:.0f} rupees."
         if action.action in ("offer", "OFFER", ActionType.OFFER)
         else f"Buyer {action.action}s."
@@ -611,7 +775,8 @@ async def seller_mode_reset(req: SellerModeResetRequest):
         "round": 1,
         "buyer_action": action.action.value if hasattr(action.action, 'value') else action.action,
         "buyer_price": action.price,
-        "message": buyer_msg,
         "your_opening": req.opening_price,
         "history": session["history"],
         "done": False,
@@ -619,7 +784,7 @@ async def seller_mode_reset(req: SellerModeResetRequest):
 @app.post("/seller-mode/step")
-async def seller_mode_step(req: SellerModeStepRequest):
     """User (as seller) sets counteroffer price. AI buyer responds."""
     if "seller_mode" not in _envs:
         raise HTTPException(status_code=400, detail="No seller-mode session. Call /seller-mode/reset first.")
@@ -690,7 +855,8 @@ async def seller_mode_step(req: SellerModeStepRequest):
         }
     # AI buyer responds
-    action = _ai_buyer_action(obs, session["strategy"], session["rng"])
     if action.action in ("accept", ActionType.ACCEPT):
         session["done"] = True
@@ -700,16 +866,20 @@ async def seller_mode_step(req: SellerModeStepRequest):
         max_surplus = session["buyer_budget"] - session["seller_cost"]
         buyer_score = max(0, surplus / max_surplus) if max_surplus > 0 else 0
         session["history"].append({
             "round": rnd,
             "actor": "buyer",
             "action": "accept",
             "price": seller_price,
         })
         return {
             "round": rnd,
-            "message": f"Buyer accepts your price of {seller_price:.0f}! Deal closed.",
             "buyer_action": "accept",
             "buyer_price": seller_price,
             "done": True,
@@ -723,17 +893,21 @@ async def seller_mode_step(req: SellerModeStepRequest):
     elif action.action in ("walk", ActionType.WALK):
         session["done"] = True
         session["outcome"] = "walk"
         session["history"].append({
             "round": rnd,
             "actor": "buyer",
             "action": "walk",
             "price": None,
         })
         return {
             "round": rnd,
-            "message": "Buyer walks away! No deal.",
             "buyer_action": "walk",
             "buyer_price": None,
             "done": True,
@@ -743,16 +917,21 @@ async def seller_mode_step(req: SellerModeStepRequest):
     else:  # offer
         session["last_buyer_offer"] = action.price
         session["history"].append({
             "round": rnd,
             "actor": "buyer",
             "action": "offer",
             "price": action.price,
         })
         return {
             "round": rnd,
-            "message": f"Buyer counters with {action.price:.0f} rupees.",
             "buyer_action": "offer",
             "buyer_price": action.price,
             "done": False,

 import copy
 import json
+import os
 from contextlib import asynccontextmanager
 from typing import Optional
+from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 # ── App state ─────────────────────────────────────────────────────
+def _client_ip(request: Request) -> Optional[str]:
+    """Best-effort client IP for rate-limiting. Honors X-Forwarded-For when
+    deployed behind a proxy/CDN; falls back to direct socket peer.
+    Note: in untrusted environments XFF can be spoofed. Hosting plan today
+    is direct uvicorn or behind a single-hop reverse proxy we control, so
+    trusting the leftmost XFF entry is acceptable.
+    """
+    xff = request.headers.get("x-forwarded-for")
+    if xff:
+        return xff.split(",")[0].strip() or None
+    return request.client.host if request.client else None
 _envs: dict[str, BazaarEnvironment] = {}
 _arenas: dict[str, MultiBuyerArena] = {}
 _ws_connections: dict[str, list[WebSocket]] = {}
     return {"status": "ok", "version": "2.0.0"}
+# ── Highlight: span-level tell extraction for the /sell page ────
+class HighlightRequest(BaseModel):
+    message: str
+class HighlightSpan(BaseModel):
+    start: int
+    end: int
+    text: str
+    signal: str
+    score: float
+    explanation: str
+class HighlightResponse(BaseModel):
+    spans: list[HighlightSpan]
+    aggregate: dict[str, float]
+@app.post("/highlight", response_model=HighlightResponse)
+async def highlight(req: HighlightRequest):
+    """Find tell-triggering phrases in a seller message and return char spans.
+    Used by the /sell page to underline urgency/deception/condition phrases
+    in the user's chat bubble after they send. Pattern-based, deterministic,
+    no LLM call — instant.
+    """
+    from nlp.keyword_patterns import find_matches, aggregate_signals
+    matches = find_matches(req.message)
+    return HighlightResponse(
+        spans=[
+            HighlightSpan(
+                start=m.start, end=m.end, text=m.text,
+                signal=m.signal, score=m.score, explanation=m.explanation,
+            )
+            for m in matches
+        ],
+        aggregate=aggregate_signals(matches),
+    )
+@app.get("/sauda/health")
+async def sauda_health(request: Request):
+    """Probe both backends. Used to choose strategy and surface config errors.
+    Public response is intentionally minimal: just a green/red signal.
+    For the full ops view (spend, rate-limit hits, circuit-breaker state),
+    pass the X-Sauda-Admin header matching SAUDA_ADMIN_TOKEN env-var.
+    """
+    from .sauda_buyer import health as _full_health
+    full = _full_health()
+    admin_token = os.environ.get("SAUDA_ADMIN_TOKEN", "").strip()
+    is_admin = bool(admin_token) and request.headers.get("x-sauda-admin", "") == admin_token
+    if is_admin:
+        return full
+    # Public view: only the bits a UI needs to decide whether the live agent
+    # is reachable. No spend numbers, no IP counts, no circuit breaker state.
+    return {
+        "status": "ok" if (full.get("hf_ok") or full.get("ollama_ok")) else "degraded",
+        "live_agent_available": bool(full.get("hf_ok") or full.get("ollama_ok")),
+    }
+@app.get("/sauda/backends")
+async def sauda_backends():
+    """Static metadata about available buyer backends, for the /sell UI dropdown."""
+    return {
+        "backends": [
+            {"id": "sauda", "label": "Sauda v2 (HF Endpoint)", "primary": True,
+             "description": "Llama-3.1-8B + SFT+GRPO LoRA, served via HF Inference Endpoint."},
+            {"id": "sauda_ollama", "label": "Sauda v2 (Ollama, local)", "primary": False,
+             "description": "Same adapter, served locally via Ollama. Fallback when HF endpoint is unavailable."},
+            {"id": "smart", "label": "Rule-based (smart)", "primary": False,
+             "description": "Heuristic baseline. No LLM. Always available."},
+            {"id": "naive", "label": "Rule-based (naive)", "primary": False,
+             "description": "Easy buyer for seller-mode warmup."},
+            {"id": "aggressive", "label": "Rule-based (aggressive)", "primary": False,
+             "description": "Hard rule-based buyer."},
+        ]
+    }
 # ── Simulate (AI auto-play for spectator mode) ──────────────────
 class SimulateRequest(BaseModel):
     price: float
+def _ai_buyer_action(
+    obs: BazaarObservation,
+    strategy: str,
+    rng,
+    *,
+    client_ip: Optional[str] = None,
+) -> BazaarAction:
+    """Built-in AI buyer strategies for spectator / seller mode.
+    `strategy` values:
+      - "sauda" / "sauda_hf"  → HF Inference Endpoint serving Sauda v2
+      - "sauda_ollama"        → local ollama serving Sauda v2
+      - "smart" / "naive" / "aggressive" → rule-based heuristics (no LLM)
+    `client_ip` is forwarded to the safety layer for per-IP rate-limiting on
+    the metered HF backend; pass None for trusted server-internal callers.
+    """
+    # Live Sauda v2 path (HF endpoint primary, Ollama fallback selectable)
+    if strategy in ("sauda", "sauda_hf", "sauda_ollama"):
+        from .sauda_buyer import sauda_action
+        backend = "ollama" if strategy == "sauda_ollama" else "hf"
+        obs_dict = obs.model_dump() if hasattr(obs, "model_dump") else obs.dict()
+        result = sauda_action(obs_dict, backend=backend, client_ip=client_ip)
+        action_str = result.get("action", "offer")
+        price = result.get("price")
+        msg = result.get("message", "")
+        if action_str == "accept":
+            ba = BazaarAction(action="accept")
+        elif action_str == "walk":
+            ba = BazaarAction(action="walk")
+        else:
+            ba = BazaarAction(action="offer", price=float(price) if price is not None else round((obs.own_private_budget or 100) * 0.3, 2))
+        # Smuggle the model's prose message + backend trace through a side channel
+        # (BazaarAction has no message field; the route handler reads .sauda_message
+        # off the action when present).
+        try:
+            object.__setattr__(ba, "sauda_message", msg)
+            object.__setattr__(ba, "sauda_backend", result.get("backend", backend))
+            if result.get("error"):
+                object.__setattr__(ba, "sauda_error", result["error"])
+        except Exception:
+            pass
+        return ba
     budget = obs.own_private_budget
     ask = obs.seller_asking_price
     opp = obs.opponent_last_offer or ask
     strategy: str = "smart"
     seed: Optional[int] = None
     opening_price: float = 60.0
+    item_name: Optional[str] = None
+    listing_price: Optional[float] = None  # if user picked a real listing, this is its MRP
 @app.post("/seller-mode/reset")
+async def seller_mode_reset(req: SellerModeResetRequest, request: Request):
     """Start a seller-mode session. User plays as seller, AI plays as buyer."""
     if req.task not in TASKS:
         raise HTTPException(status_code=400, detail=f"Unknown task: {req.task}")
     task = copy.deepcopy(TASKS[req.task])
+    # Tasks have hardcoded buyer_budget / seller_cost from synthetic examples.
+    # When the user opens at a real-listing price ($2695 for an iPhone, $399
+    # for a sofa, etc) those numbers become nonsense and Sauda offers $30 on
+    # a $2695 ask. Anchor the scale on the task's *opening price prior* —
+    # buyer_budget = 1.67×ask in single_deal (60 → 100), and the relative
+    # ratios (cost / budget ≈ 0.35, ask / budget ≈ 0.6) hold across tasks.
+    # Derive sane budget/cost from the user's actual opening_price using those
+    # ratios so the buyer's model of the deal scales with the listing.
+    if req.opening_price and req.opening_price > 0:
+        scaled_budget = float(req.opening_price) * 1.05   # buyer can stretch ~5% above ask
+        scaled_cost = float(req.opening_price) * 0.35     # seller's true cost ~35% of ask
+    else:
+        scaled_budget = task.buyer_budget
+        scaled_cost = task.seller_cost
     # Store seller mode state
     import random
     session = {
         "rng": random.Random(req.seed),
         "round": 0,
         "max_rounds": task.max_steps if task.total_episodes == 1 else task.max_steps // task.total_episodes,
+        "buyer_budget": scaled_budget,
+        "seller_cost": scaled_cost,
         "current_seller_price": req.opening_price,
         "last_buyer_offer": None,
         "history": [],
         current_round=0,
         max_rounds=session["max_rounds"],
         opponent_last_offer=req.opening_price,
+        own_private_budget=scaled_budget,
         rounds_remaining=session["max_rounds"],
         seller_asking_price=req.opening_price,
+        item_name=req.item_name or "handwoven silk scarf",
         message=f"You open at {req.opening_price:.0f} rupees.",
     )
     # AI buyer makes first offer
+    client_ip = _client_ip(request)
+    action = _ai_buyer_action(obs, req.strategy, session["rng"], client_ip=client_ip)
     session["round"] = 1
     session["last_buyer_offer"] = action.price
+    sauda_msg = getattr(action, "sauda_message", None) or ""
+    sauda_backend = getattr(action, "sauda_backend", None)
+    sauda_error = getattr(action, "sauda_error", None)
     session["history"].append({
         "round": 0,
         "actor": "seller",
         "actor": "buyer",
         "action": action.action.value if hasattr(action.action, 'value') else action.action,
         "price": action.price,
+        "message": sauda_msg,
     })
+    fallback_msg = (
         f"Buyer offers {action.price:.0f} rupees."
         if action.action in ("offer", "OFFER", ActionType.OFFER)
         else f"Buyer {action.action}s."
         "round": 1,
         "buyer_action": action.action.value if hasattr(action.action, 'value') else action.action,
         "buyer_price": action.price,
+        "message": sauda_msg or fallback_msg,
+        "buyer_message": sauda_msg,
         "your_opening": req.opening_price,
         "history": session["history"],
         "done": False,
 @app.post("/seller-mode/step")
+async def seller_mode_step(req: SellerModeStepRequest, request: Request):
     """User (as seller) sets counteroffer price. AI buyer responds."""
     if "seller_mode" not in _envs:
         raise HTTPException(status_code=400, detail="No seller-mode session. Call /seller-mode/reset first.")
         }
     # AI buyer responds
+    client_ip = _client_ip(request)
+    action = _ai_buyer_action(obs, session["strategy"], session["rng"], client_ip=client_ip)
     if action.action in ("accept", ActionType.ACCEPT):
         session["done"] = True
         max_surplus = session["buyer_budget"] - session["seller_cost"]
         buyer_score = max(0, surplus / max_surplus) if max_surplus > 0 else 0
+        sauda_msg = getattr(action, "sauda_message", None) or ""
+        sauda_backend = getattr(action, "sauda_backend", None)
         session["history"].append({
             "round": rnd,
             "actor": "buyer",
             "action": "accept",
             "price": seller_price,
+            "message": sauda_msg,
         })
         return {
             "round": rnd,
+            "message": sauda_msg or f"Buyer accepts your price of {seller_price:.0f}! Deal closed.",
+            "buyer_message": sauda_msg,
             "buyer_action": "accept",
             "buyer_price": seller_price,
             "done": True,
     elif action.action in ("walk", ActionType.WALK):
         session["done"] = True
         session["outcome"] = "walk"
+        sauda_msg = getattr(action, "sauda_message", None) or ""
+        sauda_backend = getattr(action, "sauda_backend", None)
         session["history"].append({
             "round": rnd,
             "actor": "buyer",
             "action": "walk",
             "price": None,
+            "message": sauda_msg,
         })
         return {
             "round": rnd,
+            "message": sauda_msg or "Buyer walks away! No deal.",
+            "buyer_message": sauda_msg,
             "buyer_action": "walk",
             "buyer_price": None,
             "done": True,
     else:  # offer
         session["last_buyer_offer"] = action.price
+        sauda_msg = getattr(action, "sauda_message", None) or ""
+        sauda_backend = getattr(action, "sauda_backend", None)
+        sauda_error = getattr(action, "sauda_error", None)
         session["history"].append({
             "round": rnd,
             "actor": "buyer",
             "action": "offer",
             "price": action.price,
+            "message": sauda_msg,
         })
         return {
             "round": rnd,
+            "message": sauda_msg or f"Buyer counters with {action.price:.0f} rupees.",
+            "buyer_message": sauda_msg,
             "buyer_action": "offer",
             "buyer_price": action.price,
             "done": False,

server/safety.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""Cost & abuse defenses for the live Sauda HF Inference Endpoint.
+The /sell page is exposed to the public during the demo window. A bot loop on
+/seller-mode/step would burn HF tokens unbounded. This module gates every HF
+call behind:
+  1. Hard daily call cap (HF only — ollama/rule are unmetered locally).
+  2. Per-IP sliding-window rate limit.
+  3. Global concurrent-in-flight cap.
+  4. Circuit breaker: if HF errors N times in a row, lock to fallback for K min.
+  5. Prompt-size cap (anti-prompt-injection ballooning).
+When a gate trips, we silently downgrade to the next backend (ollama → rule).
+We never tell the user "you've been rate limited" — the UI just sees a slightly
+slower or simpler buyer. The internals are surfaced via /sauda/health for ops.
+Counters persist to disk (`runs/safety_state.json`) so a restart doesn't reset
+the daily cap and let an attacker get a fresh budget.
+All gates default to permissive numbers tuned for "live demo, ~50 humans poking
+at it for an hour"; tighten via env-vars for production.
+"""
+from __future__ import annotations
+import json
+import os
+import threading
+import time
+from collections import deque
+from pathlib import Path
+from typing import Any, Optional
+# ── Tunables ──────────────────────────────────────────────────────────
+# Hard cap on total HF calls per UTC day. Trip → flip to ollama for the rest of
+# the day. Default 1500 ≈ ~$3-5 of a10g-small at typical token sizes.
+MAX_HF_CALLS_PER_DAY = int(os.environ.get("SAUDA_HF_MAX_CALLS_PER_DAY", "1500"))
+# Per-IP sliding-window. (window_seconds, max_calls) tuples.
+IP_LIMITS: list[tuple[int, int]] = [
+    (60,    int(os.environ.get("SAUDA_RL_PER_MIN", "30"))),
+    (3600,  int(os.environ.get("SAUDA_RL_PER_HOUR", "200"))),
+    (86400, int(os.environ.get("SAUDA_RL_PER_DAY",  "500"))),
+]
+# Max concurrent in-flight HF calls. Excess gets ollama immediately.
+MAX_CONCURRENT_HF = int(os.environ.get("SAUDA_MAX_CONCURRENT_HF", "4"))
+# Circuit breaker: trip after N consecutive HF errors, stay tripped for K seconds.
+CB_ERROR_THRESHOLD = int(os.environ.get("SAUDA_CB_ERRORS", "3"))
+CB_COOLDOWN_SEC    = int(os.environ.get("SAUDA_CB_COOLDOWN", "300"))
+# Reject prompts longer than this many chars (anti-injection ballooning).
+MAX_PROMPT_CHARS = int(os.environ.get("SAUDA_MAX_PROMPT_CHARS", "4000"))
+STATE_FILE = Path(os.environ.get("SAUDA_SAFETY_STATE", "runs/safety_state.json"))
+# ── Internal state ────────────────────────────────────────────────────
+_lock = threading.Lock()
+# IP → deque[float timestamps]
+_ip_calls: dict[str, deque[float]] = {}
+# Global concurrency counter.
+_inflight = 0
+# Circuit breaker state.
+_consecutive_errors = 0
+_cb_open_until: float = 0.0
+# Daily counter: { "utc_date": "YYYY-MM-DD", "calls": int }
+_daily = {"utc_date": "", "calls": 0}
+# Total spend trace for ops (resets on restart, not safety-critical).
+_lifetime = {"hf_calls": 0, "hf_errors": 0, "ollama_calls": 0, "rule_calls": 0,
+             "blocked_daily": 0, "blocked_ip": 0, "blocked_concurrency": 0,
+             "blocked_circuit": 0, "blocked_prompt": 0}
+def _today() -> str:
+    return time.strftime("%Y-%m-%d", time.gmtime())
+def _load_state() -> None:
+    global _daily
+    if not STATE_FILE.exists():
+        return
+    try:
+        data = json.loads(STATE_FILE.read_text())
+        if isinstance(data, dict) and data.get("utc_date") == _today():
+            _daily = {"utc_date": data["utc_date"], "calls": int(data.get("calls", 0))}
+    except Exception:
+        pass
+def _persist_state() -> None:
+    try:
+        STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+        STATE_FILE.write_text(json.dumps(_daily))
+    except Exception:
+        pass
+_load_state()
+# ── Public API ────────────────────────────────────────────────────────
+class HFCallDenied(Exception):
+    """Raised when a safety gate refuses an HF call. Caller should fall back."""
+    def __init__(self, reason: str, gate: str):
+        super().__init__(reason)
+        self.reason = reason
+        self.gate = gate
+def check_prompt_size(text: str) -> None:
+    """Reject prompts that are too large to be plausibly normal."""
+    if len(text) > MAX_PROMPT_CHARS:
+        with _lock:
+            _lifetime["blocked_prompt"] += 1
+        raise HFCallDenied(
+            f"prompt {len(text)} chars > cap {MAX_PROMPT_CHARS}",
+            gate="prompt_size",
+        )
+def acquire_hf_slot(client_ip: Optional[str] = None) -> None:
+    """Check all gates and reserve an in-flight slot for an HF call.
+    Caller MUST call `release_hf_slot(success=...)` after the call (in finally).
+    Raises HFCallDenied if any gate trips.
+    """
+    global _inflight
+    now = time.time()
+    today = _today()
+    with _lock:
+        # 1) Roll over daily counter at UTC midnight.
+        if _daily["utc_date"] != today:
+            _daily["utc_date"] = today
+            _daily["calls"] = 0
+            _persist_state()
+        # 2) Daily hard cap.
+        if _daily["calls"] >= MAX_HF_CALLS_PER_DAY:
+            _lifetime["blocked_daily"] += 1
+            raise HFCallDenied(
+                f"daily HF cap {MAX_HF_CALLS_PER_DAY} reached",
+                gate="daily_cap",
+            )
+        # 3) Circuit breaker.
+        if now < _cb_open_until:
+            _lifetime["blocked_circuit"] += 1
+            raise HFCallDenied(
+                f"circuit breaker open for {int(_cb_open_until - now)}s more",
+                gate="circuit_breaker",
+            )
+        # 4) Concurrency.
+        if _inflight >= MAX_CONCURRENT_HF:
+            _lifetime["blocked_concurrency"] += 1
+            raise HFCallDenied(
+                f"concurrent in-flight cap {MAX_CONCURRENT_HF} reached",
+                gate="concurrency",
+            )
+        # 5) Per-IP sliding windows.
+        if client_ip:
+            dq = _ip_calls.setdefault(client_ip, deque())
+            for window_s, max_calls in IP_LIMITS:
+                cutoff = now - window_s
+                while dq and dq[0] < cutoff:
+                    dq.popleft()
+                count_in_window = sum(1 for t in dq if t >= cutoff)
+                if count_in_window >= max_calls:
+                    _lifetime["blocked_ip"] += 1
+                    raise HFCallDenied(
+                        f"ip {client_ip} hit {max_calls}/{window_s}s",
+                        gate=f"ip_rate_{window_s}s",
+                    )
+            dq.append(now)
+        # All gates passed — reserve.
+        _inflight += 1
+        _daily["calls"] += 1
+        _lifetime["hf_calls"] += 1
+        # Persist every 10 calls to keep disk writes cheap but bounded.
+        if _daily["calls"] % 10 == 0:
+            _persist_state()
+def release_hf_slot(success: bool) -> None:
+    """Mark an in-flight HF call done. `success` updates the circuit breaker."""
+    global _inflight, _consecutive_errors, _cb_open_until
+    with _lock:
+        _inflight = max(0, _inflight - 1)
+        if success:
+            _consecutive_errors = 0
+        else:
+            _consecutive_errors += 1
+            _lifetime["hf_errors"] += 1
+            if _consecutive_errors >= CB_ERROR_THRESHOLD:
+                _cb_open_until = time.time() + CB_COOLDOWN_SEC
+def note_fallback(kind: str) -> None:
+    """Track non-HF backend usage (for /sauda/health stats)."""
+    with _lock:
+        if kind == "ollama":
+            _lifetime["ollama_calls"] += 1
+        elif kind == "rule":
+            _lifetime["rule_calls"] += 1
+def stats() -> dict[str, Any]:
+    """Snapshot of safety state, surfaced via /sauda/health (ops use only)."""
+    with _lock:
+        now = time.time()
+        return {
+            "daily": dict(_daily),
+            "daily_cap": MAX_HF_CALLS_PER_DAY,
+            "inflight": _inflight,
+            "concurrency_cap": MAX_CONCURRENT_HF,
+            "circuit_breaker_open": now < _cb_open_until,
+            "circuit_breaker_open_for_s": max(0, int(_cb_open_until - now)),
+            "consecutive_errors": _consecutive_errors,
+            "lifetime": dict(_lifetime),
+            "ip_limits": [{"window_s": w, "max_calls": n} for w, n in IP_LIMITS],
+            "tracked_ips": len(_ip_calls),
+        }

server/sauda_buyer.py ADDED Viewed

	@@ -0,0 +1,289 @@

+"""Live Sauda buyer endpoints for the /sell page.
+Two backends:
+  - "hf"      → POST to a Hugging Face Inference Endpoint (production)
+  - "ollama"  → POST to a local ollama server (fallback / dev)
+Configuration via env-vars:
+  SAUDA_BACKEND        — "hf" (default), "ollama", or "rule" (skip LLM)
+  SAUDA_HF_URL         — full HF Inference Endpoint URL, e.g.
+                         "https://abc123.us-east-1.aws.endpoints.huggingface.cloud"
+  SAUDA_HF_TOKEN       — HF token with read access to the endpoint
+  SAUDA_OLLAMA_URL     — ollama base URL (default http://localhost:11434)
+  SAUDA_OLLAMA_MODEL   — ollama tag (default "bestdealbot")
+Both paths render the buyer's observation through the same prompt the eval
+harness uses (DEFAULT_SYSTEM_PROMPT + format_observation), parse the action
+via parse_action, and apply the same Bayesian seller-tell steering as the
+v2 evaluation runs. Result: the /sell page sees the exact same buyer the
+research numbers are based on, just exposed over HTTP instead of in-process.
+"""
+from __future__ import annotations
+import json
+import os
+import re
+from typing import Any, Optional
+import requests
+from bazaarbot_env import (
+    DEFAULT_SYSTEM_PROMPT,
+    format_observation,
+    parse_action,
+    steer_bayesian_action,
+)
+from .safety import (
+    HFCallDenied,
+    acquire_hf_slot,
+    check_prompt_size,
+    note_fallback,
+    release_hf_slot,
+)
+# ── Helpers ─────────────────────────────────────────────────────────
+def _build_prompt(obs_dict: dict[str, Any]) -> tuple[str, str]:
+    """Return (system, user) messages for chat-style backends."""
+    return DEFAULT_SYSTEM_PROMPT, format_observation(obs_dict)
+def _post_json(url: str, payload: dict[str, Any], headers: dict[str, str], timeout: int = 30) -> dict:
+    resp = requests.post(url, json=payload, headers=headers, timeout=timeout)
+    resp.raise_for_status()
+    return resp.json()
+# ── HF Inference Endpoint backend ────────────────────────────────────
+def _hf_chat(system: str, user: str, *, max_new_tokens: int = 96, temperature: float = 0.6) -> str:
+    """POST to a HF Inference Endpoint serving a text-generation model.
+    Endpoints accept either OpenAI-compatible chat completions OR HF native
+    text-generation payloads depending on how they're deployed. We send the
+    OpenAI-compatible shape first since modern HF endpoints support it.
+    """
+    url = os.environ.get("SAUDA_HF_URL", "").rstrip("/")
+    token = os.environ.get("SAUDA_HF_TOKEN") or os.environ.get("HF_TOKEN")
+    if not url or not token:
+        raise RuntimeError("SAUDA_HF_URL and SAUDA_HF_TOKEN must be set")
+    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+    # Try OpenAI-compatible chat completions endpoint first
+    chat_url = url + "/v1/chat/completions"
+    chat_payload = {
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+        "max_tokens": max_new_tokens,
+        "temperature": temperature,
+        "top_p": 0.9,
+    }
+    try:
+        data = _post_json(chat_url, chat_payload, headers)
+        return data["choices"][0]["message"]["content"]
+    except Exception:
+        pass
+    # Fall back to HF native text-generation
+    payload = {
+        "inputs": f"{system}\n\n{user}\n",
+        "parameters": {
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_p": 0.9,
+            "return_full_text": False,
+        },
+    }
+    data = _post_json(url, payload, headers)
+    if isinstance(data, list) and data and "generated_text" in data[0]:
+        return data[0]["generated_text"]
+    if isinstance(data, dict) and "generated_text" in data:
+        return data["generated_text"]
+    raise RuntimeError(f"Unexpected HF endpoint response shape: {str(data)[:200]}")
+# ── Ollama backend ───────────────────────────────────────────────────
+def _ollama_chat(system: str, user: str, *, max_new_tokens: int = 96, temperature: float = 0.6) -> str:
+    """POST to a local ollama server."""
+    host = os.environ.get("SAUDA_OLLAMA_URL", "http://localhost:11434").rstrip("/")
+    model = os.environ.get("SAUDA_OLLAMA_MODEL", "bestdealbot")
+    payload = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+        ],
+        "stream": False,
+        "options": {
+            "temperature": temperature,
+            "top_p": 0.9,
+            "num_predict": max_new_tokens,
+        },
+    }
+    data = _post_json(f"{host}/api/chat", payload, {}, timeout=60)
+    return data.get("message", {}).get("content", "")
+# ── Public entrypoint ────────────────────────────────────────────────
+def sauda_action(
+    obs_dict: dict[str, Any],
+    *,
+    backend: Optional[str] = None,
+    use_steering: bool = True,
+    client_ip: Optional[str] = None,
+) -> dict[str, Any]:
+    """Get a buyer action from Sauda v2.
+    Returns dict with keys: action ("offer"|"accept"|"walk"), price (float|None),
+    message (str), backend (str echoing which path served), error (str if any).
+    Never raises — falls back through HF → ollama → rule. The /sell page is
+    interactive and a 500 mid-demo is worse than a dumb fallback.
+    Safety gates (rate/spend/concurrency caps) wrap the HF path; if any trips,
+    we silently downgrade to ollama and don't tell the user. `client_ip` is
+    used for per-IP rate-limiting; pass None for trusted server-internal calls.
+    """
+    chosen = (backend or os.environ.get("SAUDA_BACKEND") or "hf").lower()
+    system, user = _build_prompt(obs_dict)
+    text = ""
+    err: Optional[str] = None
+    served_by = chosen
+    def _try_hf() -> str:
+        """HF path with safety gates. Raises on any failure (caller falls back)."""
+        check_prompt_size(system + user)
+        acquire_hf_slot(client_ip=client_ip)
+        ok = False
+        try:
+            out = _hf_chat(system, user)
+            ok = True
+            return out
+        finally:
+            release_hf_slot(success=ok)
+    def _try_ollama() -> str:
+        out = _ollama_chat(system, user)
+        note_fallback("ollama")
+        return out
+    try:
+        if chosen == "hf":
+            text = _try_hf()
+        elif chosen == "ollama":
+            text = _try_ollama()
+        elif chosen == "rule":
+            note_fallback("rule")
+            text = ""  # forces fallback path below
+        else:
+            raise RuntimeError(f"unknown SAUDA_BACKEND: {chosen}")
+    except HFCallDenied as e:
+        # Safety gate tripped. Silently downgrade to ollama; if that fails too,
+        # the rule-based fallback below kicks in.
+        err = f"hf gated ({e.gate}); using ollama"
+        served_by = "ollama"
+        try:
+            text = _try_ollama()
+        except Exception as e2:
+            err = f"hf gated ({e.gate}); ollama also failed: {type(e2).__name__}"
+            served_by = "rule"
+            note_fallback("rule")
+    except Exception as e:
+        err = f"{chosen} backend failed: {type(e).__name__}: {str(e)[:160]}"
+        served_by = "ollama" if chosen == "hf" else f"{chosen}+fallback"
+        # If primary was HF, try ollama before giving up.
+        if chosen == "hf":
+            try:
+                text = _try_ollama()
+            except Exception as e2:
+                err = f"hf failed; ollama also failed: {type(e2).__name__}"
+                served_by = "rule"
+                note_fallback("rule")
+    fallback_price = float(obs_dict.get("own_private_budget") or 100) * 0.3
+    if text:
+        action = parse_action(text, fallback_price=fallback_price)
+        action.pop("_parse_error", None)
+    else:
+        # Conservative rule-based fallback: open at 35% of ask, escalate by round.
+        ask = float(obs_dict.get("seller_asking_price") or obs_dict.get("opponent_last_offer") or 100)
+        rnd = int(obs_dict.get("current_round") or 0)
+        last = obs_dict.get("own_last_offer")
+        if last is None:
+            price = round(ask * 0.35, 2)
+        else:
+            price = round(float(last) + (ask - float(last)) * 0.25, 2)
+        action = {"action": "offer", "price": price, "message": ""}
+    if use_steering:
+        try:
+            action = steer_bayesian_action(obs_dict, action)
+        except Exception:
+            pass
+    out: dict[str, Any] = {
+        "action": str(action.get("action", "offer")),
+        "price": action.get("price"),
+        "message": action.get("message") or "",
+        "backend": served_by,
+    }
+    if err:
+        out["error"] = err
+    return out
+def health() -> dict[str, Any]:
+    """Quick reachability probe for both backends. Used by /sauda/health."""
+    out: dict[str, Any] = {
+        "active_backend": (os.environ.get("SAUDA_BACKEND") or "hf").lower(),
+        "hf_configured": bool(os.environ.get("SAUDA_HF_URL")) and bool(
+            os.environ.get("SAUDA_HF_TOKEN") or os.environ.get("HF_TOKEN")
+        ),
+        "ollama_url": os.environ.get("SAUDA_OLLAMA_URL", "http://localhost:11434"),
+        "ollama_model": os.environ.get("SAUDA_OLLAMA_MODEL", "bestdealbot"),
+    }
+    # Probe HF (skip if not configured)
+    if out["hf_configured"]:
+        try:
+            url = os.environ["SAUDA_HF_URL"].rstrip("/")
+            token = os.environ.get("SAUDA_HF_TOKEN") or os.environ["HF_TOKEN"]
+            r = requests.get(url + "/health", headers={"Authorization": f"Bearer {token}"}, timeout=5)
+            out["hf_ok"] = r.status_code < 500
+            out["hf_status"] = r.status_code
+        except Exception as e:
+            out["hf_ok"] = False
+            out["hf_error"] = f"{type(e).__name__}: {str(e)[:120]}"
+    # Probe Ollama
+    try:
+        host = out["ollama_url"]
+        r = requests.get(f"{host}/api/tags", timeout=3)
+        out["ollama_ok"] = r.status_code == 200
+        if r.status_code == 200:
+            tags = [m.get("name", "") for m in r.json().get("models", [])]
+            out["ollama_has_model"] = out["ollama_model"] in tags or any(
+                t.startswith(out["ollama_model"]) for t in tags
+            )
+    except Exception as e:
+        out["ollama_ok"] = False
+        out["ollama_error"] = f"{type(e).__name__}: {str(e)[:120]}"
+    # Safety / spend stats (ops use only — don't expose details to UI).
+    try:
+        from .safety import stats as _safety_stats
+        out["safety"] = _safety_stats()
+    except Exception:
+        pass
+    return out