Spaces:
Running
Sync server, bazaarbot_env, and nlp modules from main
Browse filesBrings in 25+ commits of server-side improvements that have been
landing in the GitHub repo since the Space last synced on Apr 16:
- Live HF Inference Endpoint backend (server/sauda_buyer.py) with
Ollama fallback, dual-backend health probe.
- Safety module: per-IP rate limit, daily cap, concurrency cap,
circuit breaker, prompt-size cap (server/safety.py).
- /highlight endpoint for span-level seller-tell extraction
(nlp/keyword_patterns.py with English numeric deception cues
added today).
- LLMSeller persona implementation (bazaarbot_env/llm_seller.py)
using Gemma-4-E4B; auto-accept-when-offer >= reservation;
monotonic counter logic.
- gym_wrapper steering with monotonicity guard fix from today
(max(own_last_offer, ...) shape so ceiling regression can't drag
the buyer backward).
Updates Dockerfile to COPY bazaarbot_env/ and nlp/ alongside server/.
- Dockerfile +2 -0
- bazaarbot_env/__init__.py +72 -0
- bazaarbot_env/environment.py +543 -0
- bazaarbot_env/gym_wrapper.py +539 -0
- bazaarbot_env/listings.py +118 -0
- bazaarbot_env/llm_seller.py +453 -0
- bazaarbot_env/models.py +246 -0
- bazaarbot_env/seller.py +437 -0
- bazaarbot_env/tasks.py +336 -0
- nlp/__init__.py +3 -0
- nlp/eval_extractor.py +162 -0
- nlp/extractor.py +326 -0
- nlp/fetch_datasets.py +361 -0
- nlp/keyword_patterns.py +149 -0
- nlp/setup_ministral.sh +53 -0
- nlp/templates.py +185 -0
- server/main.py +195 -16
- server/safety.py +236 -0
- server/sauda_buyer.py +289 -0
|
@@ -6,6 +6,8 @@ COPY requirements.txt .
|
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
COPY server/ ./server/
|
|
|
|
|
|
|
| 9 |
COPY openenv.yaml .
|
| 10 |
|
| 11 |
RUN mkdir -p /app/data
|
|
|
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
COPY server/ ./server/
|
| 9 |
+
COPY bazaarbot_env/ ./bazaarbot_env/
|
| 10 |
+
COPY nlp/ ./nlp/
|
| 11 |
COPY openenv.yaml .
|
| 12 |
|
| 13 |
RUN mkdir -p /app/data
|
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Standalone, training-ready BazaarBot negotiation environment.
|
| 2 |
+
|
| 3 |
+
This package is a thin re-export of the core env (`models`, `seller`,
|
| 4 |
+
`environment`, `tasks`) plus a training-oriented wrapper:
|
| 5 |
+
|
| 6 |
+
from bazaarbot_env import BazaarGymEnv, rollout_episode
|
| 7 |
+
|
| 8 |
+
It is importable without FastAPI, uvicorn, or any of the serving stack β
|
| 9 |
+
designed to vendor cleanly into a Kaggle notebook or standalone training job.
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
env = BazaarGymEnv(task_name="single_deal", seed=42)
|
| 13 |
+
obs, _ = env.reset()
|
| 14 |
+
while not env.done:
|
| 15 |
+
action = policy(obs) # policy returns dict: {"action": ..., "price": ...}
|
| 16 |
+
obs, reward, done, info = env.step(action)
|
| 17 |
+
|
| 18 |
+
For GRPO-style training over multiple rollouts, use `rollout_episode`.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from .models import (
|
| 22 |
+
ActionType,
|
| 23 |
+
BazaarAction,
|
| 24 |
+
BazaarObservation,
|
| 25 |
+
BazaarReward,
|
| 26 |
+
CareerHistory,
|
| 27 |
+
DealOutcome,
|
| 28 |
+
DealRecord,
|
| 29 |
+
EnvironmentState,
|
| 30 |
+
SellerPersonalityType,
|
| 31 |
+
TaskConfig,
|
| 32 |
+
TellObservation,
|
| 33 |
+
)
|
| 34 |
+
from .environment import BazaarEnvironment
|
| 35 |
+
from .seller import SellerPersonality, SellerState, SellerTell
|
| 36 |
+
from .tasks import GRADERS, TASKS
|
| 37 |
+
from .gym_wrapper import (
|
| 38 |
+
DEFAULT_SYSTEM_PROMPT,
|
| 39 |
+
BazaarGymEnv,
|
| 40 |
+
format_observation,
|
| 41 |
+
parse_action,
|
| 42 |
+
rollout_episode,
|
| 43 |
+
steer_bayesian_action,
|
| 44 |
+
strip_think_tags,
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
__all__ = [
|
| 48 |
+
"ActionType",
|
| 49 |
+
"BazaarAction",
|
| 50 |
+
"BazaarEnvironment",
|
| 51 |
+
"BazaarGymEnv",
|
| 52 |
+
"BazaarObservation",
|
| 53 |
+
"BazaarReward",
|
| 54 |
+
"CareerHistory",
|
| 55 |
+
"DealOutcome",
|
| 56 |
+
"DealRecord",
|
| 57 |
+
"DEFAULT_SYSTEM_PROMPT",
|
| 58 |
+
"EnvironmentState",
|
| 59 |
+
"GRADERS",
|
| 60 |
+
"SellerPersonality",
|
| 61 |
+
"SellerPersonalityType",
|
| 62 |
+
"SellerState",
|
| 63 |
+
"SellerTell",
|
| 64 |
+
"TASKS",
|
| 65 |
+
"TaskConfig",
|
| 66 |
+
"TellObservation",
|
| 67 |
+
"format_observation",
|
| 68 |
+
"parse_action",
|
| 69 |
+
"rollout_episode",
|
| 70 |
+
"steer_bayesian_action",
|
| 71 |
+
"strip_think_tags",
|
| 72 |
+
]
|
|
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Core BazaarBot negotiation environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import copy
|
| 6 |
+
import math
|
| 7 |
+
import random
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
from .models import (
|
| 11 |
+
ActionType,
|
| 12 |
+
BazaarAction,
|
| 13 |
+
BazaarObservation,
|
| 14 |
+
BazaarReward,
|
| 15 |
+
CareerHistory,
|
| 16 |
+
DealOutcome,
|
| 17 |
+
DealRecord,
|
| 18 |
+
EnvironmentState,
|
| 19 |
+
SellerPersonalityType,
|
| 20 |
+
TaskConfig,
|
| 21 |
+
TellObservation,
|
| 22 |
+
)
|
| 23 |
+
from .seller import SellerPersonality, SellerState, SellerTell
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _tell_to_model(
|
| 27 |
+
tell: SellerTell | None,
|
| 28 |
+
message: str = "",
|
| 29 |
+
history: list[str] | None = None,
|
| 30 |
+
use_nlp: bool = True,
|
| 31 |
+
) -> TellObservation | None:
|
| 32 |
+
if tell is None:
|
| 33 |
+
return None
|
| 34 |
+
|
| 35 |
+
# NLP layer: extract language-based signals from the seller utterance.
|
| 36 |
+
# Rule-based body-language tells (fidget, posture, eye_contact) are kept
|
| 37 |
+
# from seller.py β NLP fills verbal and condition dimensions.
|
| 38 |
+
nlp_verbal: dict = {}
|
| 39 |
+
nlp_condition: dict = {}
|
| 40 |
+
if use_nlp and message:
|
| 41 |
+
try:
|
| 42 |
+
from nlp.extractor import TellExtractor
|
| 43 |
+
_extractor = TellExtractor()
|
| 44 |
+
extracted = _extractor.extract(message, history=history, fast=False)
|
| 45 |
+
nlp_verbal = {
|
| 46 |
+
"verbal_urgency": extracted["verbal_urgency"],
|
| 47 |
+
"verbal_confidence": extracted["verbal_confidence"],
|
| 48 |
+
"verbal_deception_cue": extracted["verbal_deception_cue"],
|
| 49 |
+
"emotional_escalation": extracted["emotional_escalation"],
|
| 50 |
+
"offer_speed": extracted["offer_speed"],
|
| 51 |
+
"concession_pattern": extracted["concession_pattern"],
|
| 52 |
+
}
|
| 53 |
+
nlp_condition = {
|
| 54 |
+
"condition_score": extracted["condition_score"],
|
| 55 |
+
"depreciation_score": extracted["depreciation_score"],
|
| 56 |
+
"condition_label": extracted["condition_label"],
|
| 57 |
+
}
|
| 58 |
+
except Exception:
|
| 59 |
+
pass # extractor unavailable or Ollama down β fall back to rule-based
|
| 60 |
+
|
| 61 |
+
# Blend: NLP verbal signals averaged with rule-based where both exist.
|
| 62 |
+
# Rule-based is ground truth for non-verbal (fidget, posture, eye_contact).
|
| 63 |
+
# NLP takes precedence for condition since rule code has no condition signal.
|
| 64 |
+
def _blend(rule_val: float, nlp_val: float | None, nlp_weight: float = 0.55) -> float:
|
| 65 |
+
if nlp_val is None:
|
| 66 |
+
return rule_val
|
| 67 |
+
return round(rule_val * (1 - nlp_weight) + nlp_val * nlp_weight, 3)
|
| 68 |
+
|
| 69 |
+
return TellObservation(
|
| 70 |
+
verbal_urgency=_blend(tell.verbal_urgency, nlp_verbal.get("verbal_urgency")),
|
| 71 |
+
verbal_confidence=_blend(tell.verbal_confidence, nlp_verbal.get("verbal_confidence")),
|
| 72 |
+
verbal_deception_cue=_blend(tell.verbal_deception_cue, nlp_verbal.get("verbal_deception_cue")),
|
| 73 |
+
price_rounding=tell.price_rounding,
|
| 74 |
+
offer_speed=nlp_verbal.get("offer_speed", tell.offer_speed),
|
| 75 |
+
concession_pattern=nlp_verbal.get("concession_pattern", tell.concession_pattern),
|
| 76 |
+
fidget_level=round(tell.fidget_level, 3),
|
| 77 |
+
eye_contact=tell.eye_contact,
|
| 78 |
+
posture=tell.posture,
|
| 79 |
+
repeat_phrases=tell.repeat_phrases,
|
| 80 |
+
topic_changes=tell.topic_changes,
|
| 81 |
+
emotional_escalation=_blend(tell.emotional_escalation, nlp_verbal.get("emotional_escalation")),
|
| 82 |
+
condition_score=nlp_condition.get("condition_score", 1.0),
|
| 83 |
+
depreciation_score=nlp_condition.get("depreciation_score", 0.0),
|
| 84 |
+
condition_label=nlp_condition.get("condition_label", "unknown"),
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class BazaarEnvironment:
|
| 89 |
+
"""Negotiation environment implementing step/reset/state."""
|
| 90 |
+
|
| 91 |
+
def __init__(self, task: TaskConfig, seed: Optional[int] = None):
|
| 92 |
+
self.task = task
|
| 93 |
+
self.rng = random.Random(seed)
|
| 94 |
+
if seed is not None:
|
| 95 |
+
random.seed(seed)
|
| 96 |
+
|
| 97 |
+
# Episode tracking
|
| 98 |
+
self.current_episode = 0
|
| 99 |
+
self.total_episodes = task.total_episodes
|
| 100 |
+
self.career_history = CareerHistory()
|
| 101 |
+
|
| 102 |
+
# Per-episode state
|
| 103 |
+
self.seller: Optional[SellerState] = None
|
| 104 |
+
self.current_round = 0
|
| 105 |
+
self.done = False
|
| 106 |
+
self.buyer_budget = task.buyer_budget
|
| 107 |
+
self.remaining_bankroll = task.buyer_budget * task.total_episodes
|
| 108 |
+
self.offer_history: list[dict] = []
|
| 109 |
+
self.cumulative_reward = 0.0
|
| 110 |
+
self.step_rewards: list[float] = []
|
| 111 |
+
self.tells_history: list[TellObservation] = []
|
| 112 |
+
|
| 113 |
+
# Stalling detection
|
| 114 |
+
self._repeated_offers = 0
|
| 115 |
+
self._last_buyer_offer: Optional[float] = None
|
| 116 |
+
|
| 117 |
+
# Episode results for career grading
|
| 118 |
+
self.episode_results: list[DealRecord] = []
|
| 119 |
+
|
| 120 |
+
# Snapshot for counterfactual replay
|
| 121 |
+
self._snapshots: dict[int, dict] = {}
|
| 122 |
+
|
| 123 |
+
# Items for variety
|
| 124 |
+
self._items = [
|
| 125 |
+
"handwoven silk scarf", "brass table lamp", "leather messenger bag",
|
| 126 |
+
"ceramic tea set", "sandalwood incense box", "hand-painted pottery",
|
| 127 |
+
"embroidered cushion cover", "copper water bottle", "jute tote bag",
|
| 128 |
+
"wooden chess set",
|
| 129 |
+
]
|
| 130 |
+
|
| 131 |
+
def _snapshot(self):
|
| 132 |
+
"""Save a snapshot of environment state for counterfactual replay."""
|
| 133 |
+
self._snapshots[self.current_round] = {
|
| 134 |
+
"seller": copy.deepcopy(self.seller),
|
| 135 |
+
"offer_history": copy.deepcopy(self.offer_history),
|
| 136 |
+
"done": self.done,
|
| 137 |
+
"cumulative_reward": self.cumulative_reward,
|
| 138 |
+
"step_rewards": list(self.step_rewards),
|
| 139 |
+
"repeated_offers": self._repeated_offers,
|
| 140 |
+
"last_buyer_offer": self._last_buyer_offer,
|
| 141 |
+
"current_round": self.current_round,
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
def restore_snapshot(self, round_num: int) -> bool:
|
| 145 |
+
"""Restore environment to state at given round. Returns False if no snapshot."""
|
| 146 |
+
snap = self._snapshots.get(round_num)
|
| 147 |
+
if snap is None:
|
| 148 |
+
return False
|
| 149 |
+
self.seller = copy.deepcopy(snap["seller"])
|
| 150 |
+
self.offer_history = copy.deepcopy(snap["offer_history"])
|
| 151 |
+
self.done = snap["done"]
|
| 152 |
+
self.cumulative_reward = snap["cumulative_reward"]
|
| 153 |
+
self.step_rewards = list(snap["step_rewards"])
|
| 154 |
+
self._repeated_offers = snap["repeated_offers"]
|
| 155 |
+
self._last_buyer_offer = snap["last_buyer_offer"]
|
| 156 |
+
self.current_round = snap["current_round"]
|
| 157 |
+
return True
|
| 158 |
+
|
| 159 |
+
def reset(self) -> BazaarObservation:
|
| 160 |
+
"""Reset for next episode."""
|
| 161 |
+
self.current_episode += 1
|
| 162 |
+
self.current_round = 0
|
| 163 |
+
self.done = False
|
| 164 |
+
self.offer_history = []
|
| 165 |
+
self.step_rewards = []
|
| 166 |
+
self.tells_history = []
|
| 167 |
+
self._repeated_offers = 0
|
| 168 |
+
self._last_buyer_offer = None
|
| 169 |
+
self._snapshots = {}
|
| 170 |
+
|
| 171 |
+
# Map personality enum
|
| 172 |
+
personality = SellerPersonality(self.task.seller_personality.value)
|
| 173 |
+
|
| 174 |
+
# Per-episode listing: sample from real dataset when enabled, else
|
| 175 |
+
# fall back to the task's static cost/budget + hardcoded items list.
|
| 176 |
+
listing = None
|
| 177 |
+
if self.task.use_real_listings:
|
| 178 |
+
from .listings import sample_listing
|
| 179 |
+
listing = sample_listing(self.rng)
|
| 180 |
+
|
| 181 |
+
if listing is not None:
|
| 182 |
+
episode_cost = listing["seller_cost"]
|
| 183 |
+
episode_anchor = listing["seller_anchor"]
|
| 184 |
+
self.buyer_budget = listing["buyer_budget"]
|
| 185 |
+
item = listing["name"]
|
| 186 |
+
else:
|
| 187 |
+
episode_cost = self.task.seller_cost
|
| 188 |
+
episode_anchor = self.task.seller_cost * self.task.seller_anchor_multiplier
|
| 189 |
+
item = self._items[(self.current_episode - 1) % len(self._items)]
|
| 190 |
+
|
| 191 |
+
# Create seller for this episode
|
| 192 |
+
self.seller = SellerState(
|
| 193 |
+
cost=episode_cost,
|
| 194 |
+
anchor=episode_anchor,
|
| 195 |
+
base_concession_rate=self.task.seller_concession_rate,
|
| 196 |
+
inventory=self.task.seller_inventory,
|
| 197 |
+
initial_inventory=self.task.seller_inventory,
|
| 198 |
+
batna_probability=self.task.seller_batna_probability,
|
| 199 |
+
max_rounds=self.task.max_steps if self.task.total_episodes == 1 else self.task.max_steps // self.task.total_episodes,
|
| 200 |
+
personality=personality,
|
| 201 |
+
_rng=self.rng,
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Career mode: update seller with buyer history
|
| 205 |
+
if self.task.enable_career and self.career_history.deals:
|
| 206 |
+
self.seller.update_career_info(self.career_history.capitulation_rate)
|
| 207 |
+
|
| 208 |
+
from .seller import _pick_message
|
| 209 |
+
open_msg = _pick_message(
|
| 210 |
+
personality, "open", self.rng,
|
| 211 |
+
item=item, price=self.seller.anchor, cost=self.task.seller_cost,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
obs = BazaarObservation(
|
| 215 |
+
current_round=0,
|
| 216 |
+
max_rounds=self.seller.max_rounds,
|
| 217 |
+
own_last_offer=None,
|
| 218 |
+
opponent_last_offer=self.seller.anchor,
|
| 219 |
+
own_private_deadline=self.task.buyer_deadline,
|
| 220 |
+
own_private_budget=self.buyer_budget,
|
| 221 |
+
rounds_remaining=self.seller.max_rounds,
|
| 222 |
+
seller_last_move_delta=None,
|
| 223 |
+
item_name=item,
|
| 224 |
+
seller_asking_price=self.seller.anchor,
|
| 225 |
+
seller_personality=self.task.seller_personality,
|
| 226 |
+
episode_number=self.current_episode,
|
| 227 |
+
total_episodes=self.total_episodes,
|
| 228 |
+
career_history=self.career_history if self.task.enable_career else None,
|
| 229 |
+
done=False,
|
| 230 |
+
message=f'Seller opens: "{open_msg}"',
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
self.offer_history.append({
|
| 234 |
+
"round": 0,
|
| 235 |
+
"actor": "seller",
|
| 236 |
+
"action": "open",
|
| 237 |
+
"price": self.seller.anchor,
|
| 238 |
+
})
|
| 239 |
+
|
| 240 |
+
self._snapshot()
|
| 241 |
+
return obs
|
| 242 |
+
|
| 243 |
+
def step(self, action: BazaarAction) -> tuple[BazaarObservation, BazaarReward]:
|
| 244 |
+
"""Process buyer action and return new observation + reward."""
|
| 245 |
+
if self.done:
|
| 246 |
+
obs = self._make_obs(message="Negotiation already concluded.")
|
| 247 |
+
obs.done = True
|
| 248 |
+
return obs, BazaarReward(reward=0.0, terminal=True)
|
| 249 |
+
|
| 250 |
+
self._snapshot()
|
| 251 |
+
self.current_round += 1
|
| 252 |
+
reward_components: dict[str, float] = {}
|
| 253 |
+
penalty = 0.0
|
| 254 |
+
|
| 255 |
+
# Validate action
|
| 256 |
+
if action.action == ActionType.OFFER:
|
| 257 |
+
if action.price is None:
|
| 258 |
+
action.price = self.buyer_budget * 0.5
|
| 259 |
+
if action.price < 0 or action.price > self.buyer_budget:
|
| 260 |
+
penalty -= 0.2
|
| 261 |
+
reward_components["out_of_range_penalty"] = -0.2
|
| 262 |
+
action.price = max(0, min(action.price, self.buyer_budget))
|
| 263 |
+
|
| 264 |
+
if self._last_buyer_offer is not None and abs(action.price - self._last_buyer_offer) < 0.5:
|
| 265 |
+
self._repeated_offers += 1
|
| 266 |
+
if self._repeated_offers >= 3:
|
| 267 |
+
penalty -= 0.1
|
| 268 |
+
reward_components["stalling_penalty"] = -0.1
|
| 269 |
+
else:
|
| 270 |
+
self._repeated_offers = 0
|
| 271 |
+
self._last_buyer_offer = action.price
|
| 272 |
+
|
| 273 |
+
# Record buyer action
|
| 274 |
+
self.offer_history.append({
|
| 275 |
+
"round": self.current_round,
|
| 276 |
+
"actor": "buyer",
|
| 277 |
+
"action": action.action.value,
|
| 278 |
+
"price": action.price,
|
| 279 |
+
})
|
| 280 |
+
|
| 281 |
+
# Process action
|
| 282 |
+
if action.action == ActionType.WALK:
|
| 283 |
+
return self._handle_walk(reward_components, penalty)
|
| 284 |
+
elif action.action == ActionType.ACCEPT:
|
| 285 |
+
return self._handle_accept(reward_components, penalty)
|
| 286 |
+
else:
|
| 287 |
+
return self._handle_offer(action.price, reward_components, penalty)
|
| 288 |
+
|
| 289 |
+
def _handle_walk(self, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
|
| 290 |
+
self.done = True
|
| 291 |
+
walk_penalty = -0.3
|
| 292 |
+
components["walk_penalty"] = walk_penalty
|
| 293 |
+
total = walk_penalty + penalty
|
| 294 |
+
|
| 295 |
+
self._record_deal(DealOutcome.WALK, None, self.current_round)
|
| 296 |
+
|
| 297 |
+
obs = self._make_obs(message="You walk away from the deal.")
|
| 298 |
+
obs.done = True
|
| 299 |
+
obs.deal_outcome = DealOutcome.WALK
|
| 300 |
+
|
| 301 |
+
reward = BazaarReward(reward=total, terminal=True, components=components)
|
| 302 |
+
self.step_rewards.append(total)
|
| 303 |
+
self.cumulative_reward += total
|
| 304 |
+
return obs, reward
|
| 305 |
+
|
| 306 |
+
def _handle_accept(self, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
|
| 307 |
+
if self.seller is None or not self.seller.offer_history:
|
| 308 |
+
obs = self._make_obs(message="No seller offer to accept yet. Make an offer first.")
|
| 309 |
+
reward = BazaarReward(reward=-0.1 + penalty, terminal=False, components={"invalid_accept": -0.1})
|
| 310 |
+
self.step_rewards.append(reward.reward)
|
| 311 |
+
self.cumulative_reward += reward.reward
|
| 312 |
+
return obs, reward
|
| 313 |
+
|
| 314 |
+
agreed_price = self.seller.current_offer
|
| 315 |
+
return self._finalize_deal(agreed_price, components, penalty, buyer_accepted=True)
|
| 316 |
+
|
| 317 |
+
def _handle_offer(self, price: float, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
|
| 318 |
+
assert self.seller is not None
|
| 319 |
+
|
| 320 |
+
seller_action, seller_price, tell, msg = self.seller.respond(price, self.current_round)
|
| 321 |
+
|
| 322 |
+
# Build conversation history for NLP context (last 4 turns)
|
| 323 |
+
recent_history = [
|
| 324 |
+
f"{h['actor']}: {h.get('price', '')}" for h in self.offer_history[-4:]
|
| 325 |
+
]
|
| 326 |
+
|
| 327 |
+
# Record tell β NLP layer blends language signals into rule-based tells
|
| 328 |
+
use_nlp = getattr(self.task, "enable_nlp", False)
|
| 329 |
+
tell_model = _tell_to_model(tell, message=msg, history=recent_history, use_nlp=use_nlp)
|
| 330 |
+
if tell_model and self.task.enable_tells:
|
| 331 |
+
self.tells_history.append(tell_model)
|
| 332 |
+
|
| 333 |
+
if seller_action == "accept":
|
| 334 |
+
self.offer_history.append({
|
| 335 |
+
"round": self.current_round,
|
| 336 |
+
"actor": "seller",
|
| 337 |
+
"action": "accept",
|
| 338 |
+
"price": price,
|
| 339 |
+
})
|
| 340 |
+
return self._finalize_deal(price, components, penalty, buyer_accepted=False, message=msg)
|
| 341 |
+
|
| 342 |
+
elif seller_action == "walk":
|
| 343 |
+
self.done = True
|
| 344 |
+
components["seller_walked"] = -0.2
|
| 345 |
+
self._record_deal(DealOutcome.WALK, None, self.current_round)
|
| 346 |
+
|
| 347 |
+
obs = self._make_obs(message=f'Seller: "{msg}"')
|
| 348 |
+
obs.done = True
|
| 349 |
+
obs.deal_outcome = DealOutcome.WALK
|
| 350 |
+
obs.tells = tell_model if self.task.enable_tells else None
|
| 351 |
+
|
| 352 |
+
total = -0.2 + penalty
|
| 353 |
+
reward = BazaarReward(reward=total, terminal=True, components=components)
|
| 354 |
+
self.step_rewards.append(total)
|
| 355 |
+
self.cumulative_reward += total
|
| 356 |
+
return obs, reward
|
| 357 |
+
|
| 358 |
+
else: # counter
|
| 359 |
+
self.offer_history.append({
|
| 360 |
+
"round": self.current_round,
|
| 361 |
+
"actor": "seller",
|
| 362 |
+
"action": "counter",
|
| 363 |
+
"price": seller_price,
|
| 364 |
+
})
|
| 365 |
+
|
| 366 |
+
# Partial progress reward
|
| 367 |
+
initial_gap = self.seller.anchor - 0
|
| 368 |
+
current_gap = abs(seller_price - price)
|
| 369 |
+
if len(self.offer_history) >= 4:
|
| 370 |
+
prev_seller = [h["price"] for h in self.offer_history if h["actor"] == "seller" and h["price"] is not None]
|
| 371 |
+
prev_buyer = [h["price"] for h in self.offer_history if h["actor"] == "buyer" and h["price"] is not None]
|
| 372 |
+
if len(prev_seller) >= 2 and len(prev_buyer) >= 2:
|
| 373 |
+
old_gap = abs(prev_seller[-2] - prev_buyer[-2])
|
| 374 |
+
gap_reduction = old_gap - current_gap
|
| 375 |
+
if gap_reduction > 0 and initial_gap > 0:
|
| 376 |
+
progress = 0.05 * (gap_reduction / initial_gap)
|
| 377 |
+
components["gap_narrowing"] = round(progress, 4)
|
| 378 |
+
|
| 379 |
+
# Check if max rounds exceeded
|
| 380 |
+
rounds_per_ep = self.seller.max_rounds
|
| 381 |
+
if self.current_round >= rounds_per_ep:
|
| 382 |
+
self.done = True
|
| 383 |
+
self._record_deal(DealOutcome.EXPIRED, None, self.current_round)
|
| 384 |
+
|
| 385 |
+
obs = self._make_obs(message="Time's up. No deal reached.")
|
| 386 |
+
obs.done = True
|
| 387 |
+
obs.deal_outcome = DealOutcome.EXPIRED
|
| 388 |
+
obs.tells = tell_model if self.task.enable_tells else None
|
| 389 |
+
components["expired_penalty"] = -0.15
|
| 390 |
+
total = sum(components.values()) + penalty
|
| 391 |
+
reward = BazaarReward(reward=total, terminal=True, components=components)
|
| 392 |
+
self.step_rewards.append(total)
|
| 393 |
+
self.cumulative_reward += total
|
| 394 |
+
return obs, reward
|
| 395 |
+
|
| 396 |
+
# Seller delta
|
| 397 |
+
seller_delta = None
|
| 398 |
+
seller_offers = [h["price"] for h in self.offer_history if h["actor"] == "seller" and h["price"] is not None]
|
| 399 |
+
if len(seller_offers) >= 2:
|
| 400 |
+
seller_delta = round(seller_offers[-2] - seller_offers[-1], 2)
|
| 401 |
+
|
| 402 |
+
total = sum(components.values()) + penalty
|
| 403 |
+
obs = self._make_obs(message=f'Seller: "{msg}"')
|
| 404 |
+
obs.opponent_last_offer = seller_price
|
| 405 |
+
obs.own_last_offer = price
|
| 406 |
+
obs.seller_last_move_delta = seller_delta
|
| 407 |
+
obs.rounds_remaining = rounds_per_ep - self.current_round
|
| 408 |
+
obs.tells = tell_model if self.task.enable_tells else None
|
| 409 |
+
|
| 410 |
+
reward = BazaarReward(reward=total, terminal=False, components=components)
|
| 411 |
+
self.step_rewards.append(total)
|
| 412 |
+
self.cumulative_reward += total
|
| 413 |
+
return obs, reward
|
| 414 |
+
|
| 415 |
+
def _finalize_deal(
|
| 416 |
+
self, agreed_price: float, components: dict, penalty: float,
|
| 417 |
+
buyer_accepted: bool, message: str | None = None,
|
| 418 |
+
) -> tuple[BazaarObservation, BazaarReward]:
|
| 419 |
+
self.done = True
|
| 420 |
+
assert self.seller is not None
|
| 421 |
+
|
| 422 |
+
budget = self.buyer_budget
|
| 423 |
+
cost = self.seller.cost
|
| 424 |
+
surplus = budget - agreed_price
|
| 425 |
+
max_surplus = budget - cost
|
| 426 |
+
normalized_surplus = surplus / max_surplus if max_surplus > 0 else 0
|
| 427 |
+
normalized_surplus = max(0, min(1, normalized_surplus))
|
| 428 |
+
|
| 429 |
+
alpha, beta = 0.3, 2.5
|
| 430 |
+
t_frac = self.current_round / max(self.seller.max_rounds, 1)
|
| 431 |
+
time_discount = math.exp(-alpha * math.exp(beta * t_frac))
|
| 432 |
+
|
| 433 |
+
rep_leak = 0.0
|
| 434 |
+
if self.task.enable_career and len(self.career_history.deals) >= 3:
|
| 435 |
+
cap_rate = self.career_history.capitulation_rate
|
| 436 |
+
rep_leak = -0.1 * cap_rate
|
| 437 |
+
components["reputation_leak"] = rep_leak
|
| 438 |
+
|
| 439 |
+
capitulated = agreed_price > self.seller.anchor * 0.85
|
| 440 |
+
|
| 441 |
+
terminal_reward = normalized_surplus * time_discount
|
| 442 |
+
components["surplus"] = round(normalized_surplus, 4)
|
| 443 |
+
components["time_discount"] = round(time_discount, 4)
|
| 444 |
+
components["terminal_reward"] = round(terminal_reward, 4)
|
| 445 |
+
|
| 446 |
+
total = terminal_reward + rep_leak + penalty
|
| 447 |
+
total = max(0, min(1, total))
|
| 448 |
+
|
| 449 |
+
self._record_deal(DealOutcome.DEAL, agreed_price, self.current_round, capitulated)
|
| 450 |
+
self.remaining_bankroll -= agreed_price
|
| 451 |
+
|
| 452 |
+
if message is None:
|
| 453 |
+
msg = f"Deal! Agreed at {agreed_price:.0f} rupees."
|
| 454 |
+
if buyer_accepted:
|
| 455 |
+
msg = f"You accept the seller's offer of {agreed_price:.0f} rupees."
|
| 456 |
+
else:
|
| 457 |
+
msg = message
|
| 458 |
+
|
| 459 |
+
obs = self._make_obs(message=msg)
|
| 460 |
+
obs.done = True
|
| 461 |
+
obs.deal_outcome = DealOutcome.DEAL
|
| 462 |
+
|
| 463 |
+
reward = BazaarReward(reward=round(total, 4), terminal=True, components=components)
|
| 464 |
+
self.step_rewards.append(total)
|
| 465 |
+
self.cumulative_reward += total
|
| 466 |
+
return obs, reward
|
| 467 |
+
|
| 468 |
+
def _record_deal(self, outcome: DealOutcome, agreed_price: Optional[float], rounds: int, capitulated: bool = False):
|
| 469 |
+
surplus = 0.0
|
| 470 |
+
norm_surplus = 0.0
|
| 471 |
+
if agreed_price is not None:
|
| 472 |
+
surplus = self.buyer_budget - agreed_price
|
| 473 |
+
seller_cost_for_deal = (
|
| 474 |
+
self.seller.cost if self.seller is not None else self.task.seller_cost
|
| 475 |
+
)
|
| 476 |
+
max_surplus = self.buyer_budget - seller_cost_for_deal
|
| 477 |
+
norm_surplus = surplus / max_surplus if max_surplus > 0 else 0
|
| 478 |
+
|
| 479 |
+
record = DealRecord(
|
| 480 |
+
episode=self.current_episode,
|
| 481 |
+
outcome=outcome,
|
| 482 |
+
agreed_price=agreed_price,
|
| 483 |
+
rounds_taken=rounds,
|
| 484 |
+
buyer_surplus=surplus,
|
| 485 |
+
normalized_surplus=norm_surplus,
|
| 486 |
+
buyer_capitulated=capitulated,
|
| 487 |
+
)
|
| 488 |
+
self.career_history.deals.append(record)
|
| 489 |
+
self.episode_results.append(record)
|
| 490 |
+
|
| 491 |
+
deals = self.career_history.deals
|
| 492 |
+
k = min(len(deals), 10)
|
| 493 |
+
recent = deals[-k:]
|
| 494 |
+
cap_count = sum(1 for d in recent if d.buyer_capitulated)
|
| 495 |
+
self.career_history.capitulation_rate = cap_count / k
|
| 496 |
+
|
| 497 |
+
completed = [d for d in recent if d.outcome == DealOutcome.DEAL]
|
| 498 |
+
if completed:
|
| 499 |
+
self.career_history.avg_normalized_surplus = sum(d.normalized_surplus for d in completed) / len(completed)
|
| 500 |
+
self.career_history.avg_rounds_to_close = sum(d.rounds_taken for d in completed) / len(completed)
|
| 501 |
+
|
| 502 |
+
def _make_obs(self, message: str = "") -> BazaarObservation:
|
| 503 |
+
rounds_per_ep = self.seller.max_rounds if self.seller else self.task.max_steps
|
| 504 |
+
return BazaarObservation(
|
| 505 |
+
current_round=self.current_round,
|
| 506 |
+
max_rounds=rounds_per_ep,
|
| 507 |
+
own_last_offer=self._last_buyer_offer,
|
| 508 |
+
opponent_last_offer=self.seller.current_offer if self.seller else None,
|
| 509 |
+
own_private_deadline=self.task.buyer_deadline,
|
| 510 |
+
own_private_budget=self.buyer_budget,
|
| 511 |
+
rounds_remaining=max(0, rounds_per_ep - self.current_round),
|
| 512 |
+
seller_last_move_delta=None,
|
| 513 |
+
item_name=self._items[(self.current_episode - 1) % len(self._items)] if self.current_episode > 0 else "item",
|
| 514 |
+
seller_asking_price=self.seller.anchor if self.seller else 0,
|
| 515 |
+
seller_personality=self.task.seller_personality,
|
| 516 |
+
episode_number=self.current_episode,
|
| 517 |
+
total_episodes=self.total_episodes,
|
| 518 |
+
career_history=self.career_history if self.task.enable_career else None,
|
| 519 |
+
done=self.done,
|
| 520 |
+
message=message,
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
def get_state(self) -> EnvironmentState:
|
| 524 |
+
return EnvironmentState(
|
| 525 |
+
task_name=self.task.name,
|
| 526 |
+
episode=self.current_episode,
|
| 527 |
+
total_episodes=self.total_episodes,
|
| 528 |
+
current_round=self.current_round,
|
| 529 |
+
max_rounds=self.seller.max_rounds if self.seller else self.task.max_steps,
|
| 530 |
+
done=self.done,
|
| 531 |
+
buyer_budget=self.buyer_budget,
|
| 532 |
+
seller_cost=self.task.seller_cost,
|
| 533 |
+
seller_anchor=self.seller.anchor if self.seller else 0,
|
| 534 |
+
seller_personality=self.task.seller_personality,
|
| 535 |
+
offer_history=self.offer_history,
|
| 536 |
+
career_history=self.career_history if self.task.enable_career else None,
|
| 537 |
+
cumulative_reward=self.cumulative_reward,
|
| 538 |
+
tells_history=self.tells_history,
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
@property
|
| 542 |
+
def all_episodes_done(self) -> bool:
|
| 543 |
+
return self.current_episode >= self.total_episodes and self.done
|
|
@@ -0,0 +1,539 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Training-friendly wrapper over BazaarEnvironment.
|
| 2 |
+
|
| 3 |
+
Exposes a minimal in-process API (no HTTP) for RL training. The wrapper:
|
| 4 |
+
|
| 5 |
+
- Accepts actions as plain dicts: ``{"action": "offer|accept|walk", "price": float | None}``.
|
| 6 |
+
- Emits observations as plain dicts with every field the LLM prompt needs.
|
| 7 |
+
- Terminates when the environment's current episode ends. For career tasks
|
| 8 |
+
(multiple episodes), call `reset_episode()` between episodes and sum
|
| 9 |
+
terminal rewards β each episode's terminal reward is the GRPO advantage unit.
|
| 10 |
+
- Provides `format_observation()` so the same prompt string is used at train
|
| 11 |
+
time and inference time.
|
| 12 |
+
- Provides `rollout_episode(policy_fn, ...)` as the GRPO rollout primitive:
|
| 13 |
+
returns a list of (prompt, action_text, reward) tuples plus the final
|
| 14 |
+
graded score.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import copy
|
| 20 |
+
import json
|
| 21 |
+
import random
|
| 22 |
+
import textwrap
|
| 23 |
+
from typing import Any, Callable, Optional
|
| 24 |
+
|
| 25 |
+
from .environment import BazaarEnvironment
|
| 26 |
+
from .models import (
|
| 27 |
+
ActionType,
|
| 28 |
+
BazaarAction,
|
| 29 |
+
BazaarObservation,
|
| 30 |
+
SellerPersonalityType,
|
| 31 |
+
TaskConfig,
|
| 32 |
+
)
|
| 33 |
+
from .tasks import GRADERS, TASKS
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Keep in sync with inference.py's system prompt so training and eval
|
| 37 |
+
# see the same conditioning. Few-shot examples are inline so a cold
|
| 38 |
+
# (un-SFT'd) base model has the pattern to copy.
|
| 39 |
+
DEFAULT_SYSTEM_PROMPT = textwrap.dedent("""\
|
| 40 |
+
You are a buyer at an Indian bazaar. Your ONLY output is one JSON object.
|
| 41 |
+
|
| 42 |
+
Rules:
|
| 43 |
+
- Seller's opening price is inflated. Negotiate down.
|
| 44 |
+
- Never reveal your budget.
|
| 45 |
+
- Close early at a good price; don't grind for pennies.
|
| 46 |
+
- The "message" is what you'd actually say to the seller β short Hinglish/English line.
|
| 47 |
+
|
| 48 |
+
Output schema (pick ONE per turn):
|
| 49 |
+
{"action": "offer", "price": <number>, "message": "<one short line>"}
|
| 50 |
+
{"action": "accept", "price": null, "message": "<one short line>"}
|
| 51 |
+
{"action": "walk", "price": null, "message": "<one short line>"}
|
| 52 |
+
|
| 53 |
+
Examples:
|
| 54 |
+
|
| 55 |
+
Seller's ask: 100. Your budget: 200.
|
| 56 |
+
{"action": "offer", "price": 35, "message": "yaar 35 max, market mein isse kam mil jaata hai"}
|
| 57 |
+
|
| 58 |
+
Seller's ask: 45. Your budget: 200.
|
| 59 |
+
{"action": "accept", "price": null, "message": "okay deal"}
|
| 60 |
+
|
| 61 |
+
Seller's ask: 180. Your budget: 200.
|
| 62 |
+
{"action": "walk", "price": null, "message": "sorry boss, itna nahi de sakta"}
|
| 63 |
+
|
| 64 |
+
Output ONE JSON object. No prose outside JSON. No markdown. No thinking.
|
| 65 |
+
""")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _obs_to_dict(obs: BazaarObservation) -> dict[str, Any]:
|
| 69 |
+
return obs.model_dump(mode="json")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def format_observation(
|
| 73 |
+
obs: dict[str, Any] | BazaarObservation,
|
| 74 |
+
history: Optional[list[str]] = None,
|
| 75 |
+
) -> str:
|
| 76 |
+
"""Format an observation as the user-turn prompt.
|
| 77 |
+
|
| 78 |
+
Mirrors the schema used by `inference.py::build_user_prompt` so the
|
| 79 |
+
policy sees the same text distribution at train and eval time.
|
| 80 |
+
"""
|
| 81 |
+
if isinstance(obs, BazaarObservation):
|
| 82 |
+
obs = _obs_to_dict(obs)
|
| 83 |
+
|
| 84 |
+
history_block = "\n".join((history or [])[-6:]) if history else "None"
|
| 85 |
+
|
| 86 |
+
career_info = ""
|
| 87 |
+
if obs.get("career_history"):
|
| 88 |
+
ch = obs["career_history"]
|
| 89 |
+
career_info = textwrap.dedent(f"""\
|
| 90 |
+
--- Career History ---
|
| 91 |
+
Episodes completed: {len(ch.get('deals', []))}
|
| 92 |
+
Your capitulation rate: {ch.get('capitulation_rate', 0):.1%}
|
| 93 |
+
Avg surplus captured: {ch.get('avg_normalized_surplus', 0):.1%}
|
| 94 |
+
Avg rounds to close: {ch.get('avg_rounds_to_close', 0):.1f}
|
| 95 |
+
""")
|
| 96 |
+
|
| 97 |
+
deadline_info = ""
|
| 98 |
+
if obs.get("own_private_deadline"):
|
| 99 |
+
deadline_info = (
|
| 100 |
+
f"YOUR HARD DEADLINE: Round {obs['own_private_deadline']} "
|
| 101 |
+
"(seller doesn't know this!)\n"
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Tells block β only rendered when the observation actually carries
|
| 105 |
+
# a tells dict. This is the in-loop training signal we want to teach
|
| 106 |
+
# the buyer to use during SFT/GRPO. At eval time the same block
|
| 107 |
+
# appears whenever enable_tells/enable_nlp is on, so the buyer sees
|
| 108 |
+
# the same prompt distribution at train and eval.
|
| 109 |
+
tells_block = ""
|
| 110 |
+
tells = obs.get("tells")
|
| 111 |
+
if tells:
|
| 112 |
+
urgency = float(tells.get("verbal_urgency", 0.0))
|
| 113 |
+
deception = float(tells.get("verbal_deception_cue", 0.0))
|
| 114 |
+
confidence = float(tells.get("verbal_confidence", 0.5))
|
| 115 |
+
concession = str(tells.get("concession_pattern", "steady"))
|
| 116 |
+
emotional = float(tells.get("emotional_escalation", 0.0))
|
| 117 |
+
repeat = int(tells.get("repeat_phrases", 0))
|
| 118 |
+
condition = str(tells.get("condition_label", "unknown"))
|
| 119 |
+
tells_block = textwrap.dedent(f"""\
|
| 120 |
+
--- Seller Tells (noisy signals β read with skepticism) ---
|
| 121 |
+
urgency: {urgency:.2f} deception_cue: {deception:.2f} confidence: {confidence:.2f}
|
| 122 |
+
concession_pattern: {concession} emotional_escalation: {emotional:.2f} repeat_phrases: {repeat}
|
| 123 |
+
condition: {condition}
|
| 124 |
+
|
| 125 |
+
""")
|
| 126 |
+
|
| 127 |
+
return textwrap.dedent(f"""\
|
| 128 |
+
--- Negotiation State ---
|
| 129 |
+
Item: {obs.get('item_name', 'item')}
|
| 130 |
+
Round: {obs.get('current_round', 0)} / {obs.get('max_rounds', 0)}
|
| 131 |
+
Rounds remaining: {obs.get('rounds_remaining', 0)}
|
| 132 |
+
Seller's current ask: {obs.get('opponent_last_offer', 'N/A')}
|
| 133 |
+
Your last offer: {obs.get('own_last_offer', 'N/A')}
|
| 134 |
+
Your private budget: {obs.get('own_private_budget', 0)}
|
| 135 |
+
Seller's opening price: {obs.get('seller_asking_price', 0)}
|
| 136 |
+
{deadline_info}\
|
| 137 |
+
Seller's last concession: {obs.get('seller_last_move_delta', 'N/A')} rupees
|
| 138 |
+
Episode: {obs.get('episode_number', 1)} / {obs.get('total_episodes', 1)}
|
| 139 |
+
|
| 140 |
+
{career_info}\
|
| 141 |
+
{tells_block}\
|
| 142 |
+
--- Recent History ---
|
| 143 |
+
{history_block}
|
| 144 |
+
|
| 145 |
+
Seller says: {obs.get('message', '')}
|
| 146 |
+
|
| 147 |
+
Your move (JSON only):
|
| 148 |
+
""")
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def strip_think_tags(chat_text: str) -> str:
|
| 152 |
+
"""NO-OP: kept for API compatibility.
|
| 153 |
+
|
| 154 |
+
We initially stripped Qwen3.5's auto-injected <think>...</think>
|
| 155 |
+
blocks from prompts and SFT targets, intending to teach the model
|
| 156 |
+
to skip reasoning and go straight to JSON. In practice the first
|
| 157 |
+
SFT run happened before the strip was wired in, so the trained
|
| 158 |
+
LoRA actually expects to see <think>\\n\\n</think>\\n\\n preceding
|
| 159 |
+
its JSON output.
|
| 160 |
+
|
| 161 |
+
Rather than redo SFT, we leave the chat template untouched and let
|
| 162 |
+
parse_action() discard the leading think block at parse time.
|
| 163 |
+
"""
|
| 164 |
+
return chat_text
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def parse_action(text: str, fallback_price: float = 30.0) -> dict[str, Any]:
|
| 168 |
+
"""Best-effort JSON parser for LLM action output.
|
| 169 |
+
|
| 170 |
+
Robust to the common failure modes: markdown fences, leading prose,
|
| 171 |
+
trailing commentary, reasoning-mode <think>...</think> blocks. Falls
|
| 172 |
+
back to a conservative offer if unparseable so training never crashes
|
| 173 |
+
on a bad generation.
|
| 174 |
+
"""
|
| 175 |
+
import re
|
| 176 |
+
s = text.strip()
|
| 177 |
+
# Drop any <think>...</think> blocks before looking for JSON
|
| 178 |
+
s = re.sub(r"<think>.*?</think>", "", s, flags=re.DOTALL).strip()
|
| 179 |
+
if "```" in s:
|
| 180 |
+
parts = s.split("```")
|
| 181 |
+
if len(parts) >= 2:
|
| 182 |
+
s = parts[1]
|
| 183 |
+
if s.lstrip().startswith("json"):
|
| 184 |
+
s = s.lstrip()[4:]
|
| 185 |
+
start = s.find("{")
|
| 186 |
+
end = s.rfind("}") + 1
|
| 187 |
+
if start >= 0 and end > start:
|
| 188 |
+
s = s[start:end]
|
| 189 |
+
try:
|
| 190 |
+
parsed = json.loads(s)
|
| 191 |
+
if parsed.get("action") not in ("offer", "accept", "walk"):
|
| 192 |
+
return {"action": "offer", "price": fallback_price, "message": "", "_parse_error": True}
|
| 193 |
+
# Ensure message field exists (older models may not return it)
|
| 194 |
+
parsed.setdefault("message", "")
|
| 195 |
+
return parsed
|
| 196 |
+
except Exception:
|
| 197 |
+
return {"action": "offer", "price": fallback_price, "message": "", "_parse_error": True}
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def steer_bayesian_action(
|
| 201 |
+
obs: dict[str, Any] | BazaarObservation,
|
| 202 |
+
proposed_action: dict[str, Any],
|
| 203 |
+
) -> dict[str, Any]:
|
| 204 |
+
"""Apply Bayesian-persuasion-inspired steering + adaptive fallback.
|
| 205 |
+
|
| 206 |
+
The model has incomplete information, so we maintain a compact posterior over
|
| 207 |
+
seller urgency/flexibility from tells and concession behavior, then gate the
|
| 208 |
+
raw model action with:
|
| 209 |
+
- a Nash-style target offer (under estimated seller cost),
|
| 210 |
+
- an adaptive close threshold near deadline (to reduce unnecessary walks),
|
| 211 |
+
- anti-premature-walk logic that prefers one more calibrated counter.
|
| 212 |
+
"""
|
| 213 |
+
if isinstance(obs, BazaarObservation):
|
| 214 |
+
obs = _obs_to_dict(obs)
|
| 215 |
+
|
| 216 |
+
original_action = str(proposed_action.get("action", "offer"))
|
| 217 |
+
original_price = proposed_action.get("price")
|
| 218 |
+
original_message = str(proposed_action.get("message") or "")
|
| 219 |
+
|
| 220 |
+
action = {
|
| 221 |
+
"action": original_action,
|
| 222 |
+
"price": original_price,
|
| 223 |
+
"message": original_message,
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
ask = float(obs.get("opponent_last_offer") or obs.get("seller_asking_price") or 0.0)
|
| 227 |
+
budget = float(obs.get("own_private_budget") or 0.0)
|
| 228 |
+
turn_index_early = int(obs.get("current_round") or 0)
|
| 229 |
+
max_rounds_early = int(obs.get("max_rounds") or 8)
|
| 230 |
+
if ask <= 0 or budget <= 0:
|
| 231 |
+
if action["action"] == "offer" and action.get("price") is None:
|
| 232 |
+
action["price"] = round(max(1.0, fallback := budget * 0.3 if budget > 0 else 30.0), 2)
|
| 233 |
+
if not action.get("message"):
|
| 234 |
+
from nlp.templates import render
|
| 235 |
+
action["message"] = render(
|
| 236 |
+
action["action"], action.get("price"),
|
| 237 |
+
ask=ask, turn_index=turn_index_early, max_turns=max_rounds_early,
|
| 238 |
+
)
|
| 239 |
+
return action
|
| 240 |
+
|
| 241 |
+
rounds_remaining = int(obs.get("rounds_remaining") or 0)
|
| 242 |
+
max_rounds = max(1, int(obs.get("max_rounds") or rounds_remaining or 1))
|
| 243 |
+
current_round = int(obs.get("current_round") or (max_rounds - rounds_remaining))
|
| 244 |
+
late_pressure = max(0.0, min(1.0, current_round / max_rounds))
|
| 245 |
+
|
| 246 |
+
personality = str(obs.get("seller_personality") or "default")
|
| 247 |
+
prior_urgency = {
|
| 248 |
+
"default": 0.50,
|
| 249 |
+
"deceptive": 0.45,
|
| 250 |
+
"impatient": 0.68,
|
| 251 |
+
"collaborative": 0.40,
|
| 252 |
+
}.get(personality, 0.50)
|
| 253 |
+
prior_flex = {
|
| 254 |
+
"default": 0.50,
|
| 255 |
+
"deceptive": 0.30,
|
| 256 |
+
"impatient": 0.65,
|
| 257 |
+
"collaborative": 0.72,
|
| 258 |
+
}.get(personality, 0.50)
|
| 259 |
+
|
| 260 |
+
tells = obs.get("tells") or {}
|
| 261 |
+
verbal_urgency = float(tells.get("verbal_urgency") or 0.0)
|
| 262 |
+
fidget = float(tells.get("fidget_level") or 0.0)
|
| 263 |
+
emotional = float(tells.get("emotional_escalation") or 0.0)
|
| 264 |
+
deception = float(tells.get("verbal_deception_cue") or 0.0)
|
| 265 |
+
confidence = float(tells.get("verbal_confidence") or 0.5)
|
| 266 |
+
speed = str(tells.get("offer_speed") or "normal")
|
| 267 |
+
concession_pattern = str(tells.get("concession_pattern") or "steady")
|
| 268 |
+
|
| 269 |
+
speed_urgency = {"instant": 0.15, "normal": 0.05, "deliberate": -0.05}.get(speed, 0.0)
|
| 270 |
+
pattern_urgency = {
|
| 271 |
+
"front_loaded": 0.15,
|
| 272 |
+
"erratic": 0.08,
|
| 273 |
+
"stalling": -0.10,
|
| 274 |
+
"steady": 0.00,
|
| 275 |
+
}.get(concession_pattern, 0.0)
|
| 276 |
+
signal_urgency = max(
|
| 277 |
+
0.0,
|
| 278 |
+
min(
|
| 279 |
+
1.0,
|
| 280 |
+
0.35 * verbal_urgency
|
| 281 |
+
+ 0.25 * fidget
|
| 282 |
+
+ 0.20 * emotional
|
| 283 |
+
+ 0.10 * deception
|
| 284 |
+
+ 0.10 * (1.0 - confidence)
|
| 285 |
+
+ speed_urgency
|
| 286 |
+
+ pattern_urgency,
|
| 287 |
+
),
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
seller_delta = float(obs.get("seller_last_move_delta") or 0.0)
|
| 291 |
+
concession_ratio = max(0.0, min(1.0, seller_delta / max(ask, 1.0)))
|
| 292 |
+
pattern_flex = {
|
| 293 |
+
"front_loaded": 0.22,
|
| 294 |
+
"steady": 0.08,
|
| 295 |
+
"erratic": 0.03,
|
| 296 |
+
"stalling": -0.18,
|
| 297 |
+
}.get(concession_pattern, 0.0)
|
| 298 |
+
signal_flex = max(
|
| 299 |
+
0.0,
|
| 300 |
+
min(
|
| 301 |
+
1.0,
|
| 302 |
+
0.45 * concession_ratio
|
| 303 |
+
+ 0.20 * (1.0 - confidence)
|
| 304 |
+
+ 0.20 * verbal_urgency
|
| 305 |
+
+ 0.15 * (1.0 - deception)
|
| 306 |
+
+ pattern_flex,
|
| 307 |
+
),
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
posterior_urgency = max(0.0, min(1.0, 0.55 * prior_urgency + 0.45 * signal_urgency))
|
| 311 |
+
posterior_flex = max(0.0, min(1.0, 0.55 * prior_flex + 0.45 * signal_flex))
|
| 312 |
+
|
| 313 |
+
estimated_cost = ask * (0.58 - 0.18 * posterior_urgency + 0.08 * (1.0 - posterior_flex))
|
| 314 |
+
estimated_cost = max(1.0, min(estimated_cost, ask * 0.90))
|
| 315 |
+
|
| 316 |
+
# Nash bargaining point under estimated seller cost and inferred buyer power.
|
| 317 |
+
buyer_power = 0.35 + 0.40 * posterior_urgency + 0.20 * posterior_flex - 0.30 * late_pressure
|
| 318 |
+
buyer_power = max(0.20, min(0.85, buyer_power))
|
| 319 |
+
nash_target = (1.0 - buyer_power) * budget + buyer_power * estimated_cost
|
| 320 |
+
nash_target = max(1.0, min(nash_target, min(budget * 0.95, ask * 1.02)))
|
| 321 |
+
|
| 322 |
+
# Adaptive fallback: grow acceptance threshold late so we close more often.
|
| 323 |
+
close_slack = 0.28 + 0.45 * late_pressure + 0.12 * (1.0 - posterior_urgency)
|
| 324 |
+
accept_threshold = nash_target + (budget - nash_target) * close_slack
|
| 325 |
+
accept_threshold = min(accept_threshold, budget * 0.95)
|
| 326 |
+
|
| 327 |
+
floor_offer = max(1.0, min(nash_target * 0.85, ask * 0.65, budget * 0.85))
|
| 328 |
+
ceiling_offer = min(accept_threshold, ask * (0.90 + 0.08 * late_pressure))
|
| 329 |
+
if rounds_remaining <= 2:
|
| 330 |
+
floor_offer = max(floor_offer, ask * 0.87)
|
| 331 |
+
ceiling_offer = max(ceiling_offer, floor_offer)
|
| 332 |
+
if ceiling_offer < floor_offer:
|
| 333 |
+
floor_offer = ceiling_offer
|
| 334 |
+
|
| 335 |
+
own_last_offer = obs.get("own_last_offer")
|
| 336 |
+
own_last_offer = float(own_last_offer) if own_last_offer is not None else None
|
| 337 |
+
|
| 338 |
+
def _finalize(out: dict) -> dict:
|
| 339 |
+
"""Re-message via template if steerer changed action or moved price β₯10%."""
|
| 340 |
+
new_action = out["action"]
|
| 341 |
+
new_price = out.get("price")
|
| 342 |
+
action_changed = new_action != original_action
|
| 343 |
+
price_changed = (
|
| 344 |
+
original_price is not None
|
| 345 |
+
and new_price is not None
|
| 346 |
+
and abs(float(new_price) - float(original_price)) / max(float(original_price), 1.0) > 0.10
|
| 347 |
+
)
|
| 348 |
+
if action_changed or price_changed or not original_message:
|
| 349 |
+
from nlp.templates import render
|
| 350 |
+
out["message"] = render(
|
| 351 |
+
new_action, new_price,
|
| 352 |
+
ask=ask, turn_index=current_round, max_turns=max_rounds,
|
| 353 |
+
)
|
| 354 |
+
else:
|
| 355 |
+
out["message"] = original_message
|
| 356 |
+
return out
|
| 357 |
+
|
| 358 |
+
if action["action"] == "accept":
|
| 359 |
+
if ask > accept_threshold and rounds_remaining > 1:
|
| 360 |
+
action["action"] = "offer"
|
| 361 |
+
action["price"] = round(max(floor_offer, min(ceiling_offer, nash_target)), 2)
|
| 362 |
+
else:
|
| 363 |
+
action["price"] = None
|
| 364 |
+
return _finalize(action)
|
| 365 |
+
|
| 366 |
+
if action["action"] == "walk":
|
| 367 |
+
if rounds_remaining <= 1 and ask > budget * 0.98:
|
| 368 |
+
action["price"] = None
|
| 369 |
+
return _finalize(action)
|
| 370 |
+
# Anti-premature walk: take one calibrated close attempt first.
|
| 371 |
+
if ask <= accept_threshold and rounds_remaining <= 2:
|
| 372 |
+
action["action"] = "accept"
|
| 373 |
+
action["price"] = None
|
| 374 |
+
return _finalize(action)
|
| 375 |
+
action["action"] = "offer"
|
| 376 |
+
probe_start = own_last_offer if own_last_offer is not None else floor_offer
|
| 377 |
+
probe_price = max(floor_offer, min(ceiling_offer, probe_start + max(1.0, ask * 0.06)))
|
| 378 |
+
action["price"] = round(probe_price, 2)
|
| 379 |
+
return _finalize(action)
|
| 380 |
+
|
| 381 |
+
# Offer path: clip to Bayesian/Nash band and auto-close late if ask is acceptable.
|
| 382 |
+
if rounds_remaining <= 1 and ask <= accept_threshold:
|
| 383 |
+
return _finalize({"action": "accept", "price": None, "message": ""})
|
| 384 |
+
|
| 385 |
+
proposed_price = action.get("price")
|
| 386 |
+
if proposed_price is None:
|
| 387 |
+
proposed_price = (floor_offer + ceiling_offer) / 2
|
| 388 |
+
proposed_price = float(proposed_price)
|
| 389 |
+
steered_price = max(floor_offer, min(ceiling_offer, proposed_price))
|
| 390 |
+
# Buyer offers must move monotonically toward seller. If the model proposes
|
| 391 |
+
# a price below our previous offer (which the seller has already implicitly
|
| 392 |
+
# rejected by countering), bump up to at least last + a small concession
|
| 393 |
+
# toward the seller's ask. Without this the buyer can slide *backward*
|
| 394 |
+
# mid-negotiation, which sellers correctly read as either incoherent or
|
| 395 |
+
# bad-faith.
|
| 396 |
+
if own_last_offer is not None and steered_price < own_last_offer:
|
| 397 |
+
gap = max(0.0, ask - own_last_offer)
|
| 398 |
+
bump = max(1.0, gap * 0.15)
|
| 399 |
+
# Hold at last offer if ceiling has fallen below it; never retreat.
|
| 400 |
+
# Clamping to min(ceiling, ...) here would let ceiling drag us backward
|
| 401 |
+
# β the exact bug the guard is meant to prevent.
|
| 402 |
+
target = max(own_last_offer, min(ceiling_offer, own_last_offer + bump))
|
| 403 |
+
steered_price = target
|
| 404 |
+
action["price"] = round(steered_price, 2)
|
| 405 |
+
action["action"] = "offer"
|
| 406 |
+
return _finalize(action)
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
class BazaarGymEnv:
|
| 410 |
+
"""Minimal gym-like wrapper over BazaarEnvironment for in-process training."""
|
| 411 |
+
|
| 412 |
+
def __init__(
|
| 413 |
+
self,
|
| 414 |
+
task_name: str = "single_deal",
|
| 415 |
+
seed: Optional[int] = None,
|
| 416 |
+
personality_override: Optional[str] = None,
|
| 417 |
+
):
|
| 418 |
+
if task_name not in TASKS:
|
| 419 |
+
raise ValueError(
|
| 420 |
+
f"Unknown task: {task_name}. Available: {list(TASKS.keys())}"
|
| 421 |
+
)
|
| 422 |
+
self.task_name = task_name
|
| 423 |
+
self.seed = seed
|
| 424 |
+
self._base_task = copy.deepcopy(TASKS[task_name])
|
| 425 |
+
if personality_override:
|
| 426 |
+
self._base_task.seller_personality = SellerPersonalityType(
|
| 427 |
+
personality_override
|
| 428 |
+
)
|
| 429 |
+
self._env: Optional[BazaarEnvironment] = None
|
| 430 |
+
self.done: bool = True
|
| 431 |
+
|
| 432 |
+
def reset(self) -> tuple[dict[str, Any], dict[str, Any]]:
|
| 433 |
+
self._env = BazaarEnvironment(copy.deepcopy(self._base_task), seed=self.seed)
|
| 434 |
+
obs = self._env.reset()
|
| 435 |
+
self.done = False
|
| 436 |
+
return _obs_to_dict(obs), {}
|
| 437 |
+
|
| 438 |
+
def step(
|
| 439 |
+
self, action: dict[str, Any]
|
| 440 |
+
) -> tuple[dict[str, Any], float, bool, dict[str, Any]]:
|
| 441 |
+
if self._env is None:
|
| 442 |
+
raise RuntimeError("Call reset() before step().")
|
| 443 |
+
act = BazaarAction(
|
| 444 |
+
action=ActionType(action.get("action", "offer")),
|
| 445 |
+
price=action.get("price"),
|
| 446 |
+
)
|
| 447 |
+
obs, reward_obj = self._env.step(act)
|
| 448 |
+
# Episode-level done. For career tasks, we signal done at episode end
|
| 449 |
+
# so the outer loop can compute per-episode rewards; the caller resets.
|
| 450 |
+
self.done = obs.done
|
| 451 |
+
info = {
|
| 452 |
+
"components": reward_obj.components,
|
| 453 |
+
"episode": self._env.current_episode,
|
| 454 |
+
"all_episodes_done": self._env.all_episodes_done,
|
| 455 |
+
}
|
| 456 |
+
return _obs_to_dict(obs), float(reward_obj.reward), self.done, info
|
| 457 |
+
|
| 458 |
+
def score(self) -> float:
|
| 459 |
+
"""Final graded score across all completed episodes."""
|
| 460 |
+
if self._env is None:
|
| 461 |
+
return 0.0
|
| 462 |
+
grader = GRADERS.get(self._env.task.name)
|
| 463 |
+
if grader is None:
|
| 464 |
+
return 0.0
|
| 465 |
+
return float(grader(self._env.episode_results, self._env.task))
|
| 466 |
+
|
| 467 |
+
@property
|
| 468 |
+
def env(self) -> BazaarEnvironment:
|
| 469 |
+
if self._env is None:
|
| 470 |
+
raise RuntimeError("Environment not initialized; call reset().")
|
| 471 |
+
return self._env
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
PolicyFn = Callable[[str], str]
|
| 475 |
+
"""A policy takes a user-turn prompt and returns raw text (LLM completion)."""
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
def rollout_episode(
|
| 479 |
+
policy_fn: PolicyFn,
|
| 480 |
+
task_name: str = "single_deal",
|
| 481 |
+
seed: Optional[int] = None,
|
| 482 |
+
personality_override: Optional[str] = None,
|
| 483 |
+
max_env_steps: int = 200,
|
| 484 |
+
system_prompt: str = DEFAULT_SYSTEM_PROMPT,
|
| 485 |
+
) -> dict[str, Any]:
|
| 486 |
+
"""Run one full rollout with an LLM policy; return trajectory + score.
|
| 487 |
+
|
| 488 |
+
Returns a dict with keys:
|
| 489 |
+
steps: list of {prompt, completion, action, reward, done} per turn
|
| 490 |
+
total_reward: sum of per-step rewards
|
| 491 |
+
score: grader-assigned terminal score (this is the GRPO reward signal)
|
| 492 |
+
num_steps, success
|
| 493 |
+
"""
|
| 494 |
+
env = BazaarGymEnv(
|
| 495 |
+
task_name=task_name, seed=seed, personality_override=personality_override
|
| 496 |
+
)
|
| 497 |
+
obs, _ = env.reset()
|
| 498 |
+
history: list[str] = []
|
| 499 |
+
steps: list[dict[str, Any]] = []
|
| 500 |
+
total_reward = 0.0
|
| 501 |
+
|
| 502 |
+
for _ in range(max_env_steps):
|
| 503 |
+
prompt = format_observation(obs, history=history)
|
| 504 |
+
completion = policy_fn(prompt)
|
| 505 |
+
action = parse_action(completion, fallback_price=obs.get("own_private_budget", 100) * 0.3)
|
| 506 |
+
|
| 507 |
+
obs, reward, done, info = env.step(action)
|
| 508 |
+
total_reward += reward
|
| 509 |
+
|
| 510 |
+
history.append(
|
| 511 |
+
f"Round {obs.get('current_round', '?')}: You "
|
| 512 |
+
f"{'offered ' + str(action.get('price')) if action.get('action') == 'offer' else action.get('action')}"
|
| 513 |
+
f" -> Seller: {obs.get('message', '')}"
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
steps.append({
|
| 517 |
+
"prompt": prompt,
|
| 518 |
+
"completion": completion,
|
| 519 |
+
"action": action,
|
| 520 |
+
"reward": reward,
|
| 521 |
+
"done": done,
|
| 522 |
+
"parse_error": bool(action.get("_parse_error")),
|
| 523 |
+
})
|
| 524 |
+
|
| 525 |
+
if done:
|
| 526 |
+
if info.get("all_episodes_done"):
|
| 527 |
+
break
|
| 528 |
+
# Career mode: inner env auto-resets via the wrapper's outer loop.
|
| 529 |
+
# We let the test harness (or trainer) handle multi-episode by
|
| 530 |
+
# calling rollout_episode once per episode if desired.
|
| 531 |
+
break
|
| 532 |
+
|
| 533 |
+
return {
|
| 534 |
+
"steps": steps,
|
| 535 |
+
"total_reward": total_reward,
|
| 536 |
+
"score": env.score(),
|
| 537 |
+
"num_steps": len(steps),
|
| 538 |
+
"task": task_name,
|
| 539 |
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Real-world listing sampler for varied negotiation scenarios.
|
| 2 |
+
|
| 3 |
+
Loads the Amazon Sales CSV (committed at ``data/amazon.csv``) and exposes
|
| 4 |
+
``sample_listing(rng)``. Each listing provides ground-truth fair-market
|
| 5 |
+
anchors used to set buyer budget and seller cost per episode, so the model
|
| 6 |
+
sees a different item and price range every rollout instead of the 10
|
| 7 |
+
hardcoded bazaar items.
|
| 8 |
+
|
| 9 |
+
Price mapping (rupees):
|
| 10 |
+
actual_price -> seller opening anchor (MRP)
|
| 11 |
+
discounted_price -> realistic market price
|
| 12 |
+
seller_cost = discounted_price * 0.7 (below-market floor)
|
| 13 |
+
buyer_budget = actual_price (can afford MRP but wants lower)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import csv
|
| 19 |
+
import os
|
| 20 |
+
import random
|
| 21 |
+
import re
|
| 22 |
+
from functools import lru_cache
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Optional
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Path resolution: try repo root, then package-local data dir.
|
| 28 |
+
_CANDIDATES = [
|
| 29 |
+
Path(__file__).resolve().parent.parent / "data" / "amazon.csv",
|
| 30 |
+
Path(__file__).resolve().parent / "data" / "amazon.csv",
|
| 31 |
+
Path(os.getenv("BAZAARBOT_LISTINGS_CSV", "")),
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _find_csv() -> Optional[Path]:
|
| 36 |
+
for p in _CANDIDATES:
|
| 37 |
+
if p and p.exists():
|
| 38 |
+
return p
|
| 39 |
+
return None
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _parse_rupees(s: str) -> Optional[float]:
|
| 43 |
+
"""Parse 'βΉ1,099' -> 1099.0. None on failure."""
|
| 44 |
+
if not s:
|
| 45 |
+
return None
|
| 46 |
+
cleaned = re.sub(r"[^\d.]", "", s)
|
| 47 |
+
try:
|
| 48 |
+
return float(cleaned) if cleaned else None
|
| 49 |
+
except ValueError:
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
@lru_cache(maxsize=1)
|
| 54 |
+
def _load_listings() -> list[dict]:
|
| 55 |
+
csv_path = _find_csv()
|
| 56 |
+
if csv_path is None:
|
| 57 |
+
return []
|
| 58 |
+
|
| 59 |
+
listings: list[dict] = []
|
| 60 |
+
with open(csv_path, encoding="utf-8") as f:
|
| 61 |
+
for row in csv.DictReader(f):
|
| 62 |
+
actual = _parse_rupees(row.get("actual_price", ""))
|
| 63 |
+
discounted = _parse_rupees(row.get("discounted_price", ""))
|
| 64 |
+
name = (row.get("product_name") or "").strip()
|
| 65 |
+
if not name or actual is None or discounted is None:
|
| 66 |
+
continue
|
| 67 |
+
if actual <= 0 or discounted <= 0 or discounted >= actual:
|
| 68 |
+
# require a real discount so there's negotiation room
|
| 69 |
+
continue
|
| 70 |
+
# Trim absurdly long product titles; keep the informative head.
|
| 71 |
+
short_name = name.split(",")[0].strip()
|
| 72 |
+
if len(short_name) > 80:
|
| 73 |
+
short_name = short_name[:77] + "..."
|
| 74 |
+
listings.append({
|
| 75 |
+
"name": short_name,
|
| 76 |
+
"full_name": name,
|
| 77 |
+
"category": (row.get("category") or "").split("|")[0].strip(),
|
| 78 |
+
"actual_price": actual,
|
| 79 |
+
"discounted_price": discounted,
|
| 80 |
+
})
|
| 81 |
+
return listings
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def num_listings() -> int:
|
| 85 |
+
return len(_load_listings())
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def sample_listing(rng: Optional[random.Random] = None) -> Optional[dict]:
|
| 89 |
+
"""Return a dict with listing + derived bazaar params, or None if CSV absent.
|
| 90 |
+
|
| 91 |
+
Return shape::
|
| 92 |
+
|
| 93 |
+
{
|
| 94 |
+
"name": str,
|
| 95 |
+
"category": str,
|
| 96 |
+
"actual_price": float,
|
| 97 |
+
"discounted_price": float,
|
| 98 |
+
"seller_cost": float, # below-market floor
|
| 99 |
+
"buyer_budget": float, # MRP ceiling
|
| 100 |
+
"seller_anchor": float, # opening ask
|
| 101 |
+
"fair_value": float, # street price (hidden from buyer)
|
| 102 |
+
}
|
| 103 |
+
"""
|
| 104 |
+
listings = _load_listings()
|
| 105 |
+
if not listings:
|
| 106 |
+
return None
|
| 107 |
+
rng = rng or random
|
| 108 |
+
row = rng.choice(listings)
|
| 109 |
+
return {
|
| 110 |
+
"name": row["name"],
|
| 111 |
+
"category": row["category"],
|
| 112 |
+
"actual_price": row["actual_price"],
|
| 113 |
+
"discounted_price": row["discounted_price"],
|
| 114 |
+
"seller_cost": round(row["discounted_price"] * 0.7, 2),
|
| 115 |
+
"buyer_budget": round(row["actual_price"], 2),
|
| 116 |
+
"seller_anchor": round(row["actual_price"], 2),
|
| 117 |
+
"fair_value": round(row["discounted_price"], 2),
|
| 118 |
+
}
|
|
@@ -0,0 +1,453 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM-backed seller for BazaarBATNA.
|
| 2 |
+
|
| 3 |
+
Implements docs/SELLER_HANDOFF.md interface:
|
| 4 |
+
LLMSeller(listing, role_brief, model).open() -> str
|
| 5 |
+
LLMSeller(...).respond(history, buyer_message, buyer_offer) -> SellerReply
|
| 6 |
+
|
| 7 |
+
Designed to run on a single 16GB GPU (Kaggle T4) at 4-bit. Default model is
|
| 8 |
+
Gemma-3-4B-Instruct (~3GB at 4-bit), which fits with headroom. Larger models
|
| 9 |
+
(e.g. gemma-2-9b-it) work too on T4 at 4-bit.
|
| 10 |
+
|
| 11 |
+
Hard rules enforced in code (not just prompt):
|
| 12 |
+
1. Never accept below reservation
|
| 13 |
+
2. Never leak reservation price in messages
|
| 14 |
+
3. Counter offers always >= reservation
|
| 15 |
+
4. Counter must improve on previous counter (monotone toward buyer)
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
import json
|
| 21 |
+
import re
|
| 22 |
+
from dataclasses import dataclass
|
| 23 |
+
from typing import Any, Literal, TypedDict
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class HistoryTurn(TypedDict):
|
| 27 |
+
role: Literal["seller", "buyer"]
|
| 28 |
+
message: str
|
| 29 |
+
price: float | None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class SellerReply(TypedDict):
|
| 33 |
+
message: str
|
| 34 |
+
action: Literal["counter", "accept", "walk"]
|
| 35 |
+
price: float | None
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ββ Persona prompt fragments ββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
+
PERSONA_GUIDANCE = {
|
| 40 |
+
"default": "Stay balanced. Concede in moderate steps. Justify price with item details.",
|
| 41 |
+
"firm": "Concede slowly. Defend your asking price with specific details from the listing.",
|
| 42 |
+
"flexible": "Be warm and willing to deal, but still profit-seeking β don't capitulate.",
|
| 43 |
+
"deceptive": "Use bluffs about other interested buyers and time pressure to push the price up.",
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ββ Lazy model bundle cache βββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
+
@dataclass
|
| 49 |
+
class _Bundle:
|
| 50 |
+
tokenizer: Any
|
| 51 |
+
model: Any
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
_MODEL_CACHE: dict[str, _Bundle] = {}
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _load_bundle(model_name: str) -> _Bundle:
|
| 58 |
+
"""Load + cache a 4-bit quantized model. Lazy import keeps the file usable
|
| 59 |
+
on machines without torch installed (e.g. lint, doc generation)."""
|
| 60 |
+
cached = _MODEL_CACHE.get(model_name)
|
| 61 |
+
if cached is not None:
|
| 62 |
+
return cached
|
| 63 |
+
|
| 64 |
+
import torch
|
| 65 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 66 |
+
|
| 67 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 68 |
+
|
| 69 |
+
# Tokenizer loading on some environments (notably Kaggle) can hit a
|
| 70 |
+
# SentencePiece error: `TypeError: not a string` when a Path-like is passed
|
| 71 |
+
# into `SentencePieceProcessor.Load`. If that happens, fall back to
|
| 72 |
+
# constructing GemmaTokenizer directly with a string path to tokenizer.model.
|
| 73 |
+
try:
|
| 74 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
|
| 75 |
+
except TypeError as e:
|
| 76 |
+
msg = str(e).lower()
|
| 77 |
+
if "not a string" not in msg:
|
| 78 |
+
raise
|
| 79 |
+
try:
|
| 80 |
+
from huggingface_hub import hf_hub_download
|
| 81 |
+
from transformers import GemmaTokenizer
|
| 82 |
+
|
| 83 |
+
tok_path = hf_hub_download(
|
| 84 |
+
repo_id=model_name,
|
| 85 |
+
filename="tokenizer.model",
|
| 86 |
+
token=True,
|
| 87 |
+
)
|
| 88 |
+
tokenizer = GemmaTokenizer(vocab_file=str(tok_path))
|
| 89 |
+
except Exception:
|
| 90 |
+
# If fallback fails, re-raise the original, more informative error.
|
| 91 |
+
raise e
|
| 92 |
+
if tokenizer.pad_token is None:
|
| 93 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 94 |
+
|
| 95 |
+
# SELLER_DTYPE env-var: "4bit" (default), "bf16", "fp16"
|
| 96 |
+
import os as _os
|
| 97 |
+
dtype_choice = _os.environ.get("SELLER_DTYPE", "4bit").lower()
|
| 98 |
+
kwargs: dict[str, Any] = {"device_map": "auto", "trust_remote_code": True}
|
| 99 |
+
if torch.cuda.is_available():
|
| 100 |
+
if dtype_choice == "4bit":
|
| 101 |
+
kwargs["torch_dtype"] = torch.bfloat16
|
| 102 |
+
kwargs["quantization_config"] = BitsAndBytesConfig(
|
| 103 |
+
load_in_4bit=True,
|
| 104 |
+
bnb_4bit_quant_type="nf4",
|
| 105 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 106 |
+
bnb_4bit_use_double_quant=True,
|
| 107 |
+
)
|
| 108 |
+
elif dtype_choice == "fp16":
|
| 109 |
+
kwargs["torch_dtype"] = torch.float16
|
| 110 |
+
else:
|
| 111 |
+
kwargs["torch_dtype"] = torch.bfloat16
|
| 112 |
+
else:
|
| 113 |
+
kwargs["torch_dtype"] = torch.float32
|
| 114 |
+
|
| 115 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
|
| 116 |
+
model.config.use_cache = True
|
| 117 |
+
|
| 118 |
+
bundle = _Bundle(tokenizer=tokenizer, model=model)
|
| 119 |
+
_MODEL_CACHE[model_name] = bundle
|
| 120 |
+
return bundle
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
+
def _to_float(value: Any) -> float | None:
|
| 125 |
+
if value is None:
|
| 126 |
+
return None
|
| 127 |
+
if isinstance(value, (int, float)):
|
| 128 |
+
return float(value)
|
| 129 |
+
if isinstance(value, str):
|
| 130 |
+
cleaned = value.replace("$", "").replace(",", "").strip()
|
| 131 |
+
try:
|
| 132 |
+
return float(cleaned)
|
| 133 |
+
except ValueError:
|
| 134 |
+
return None
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _extract_json(text: str) -> dict[str, Any] | None:
|
| 139 |
+
"""Best-effort JSON parser for LLM output."""
|
| 140 |
+
if not text:
|
| 141 |
+
return None
|
| 142 |
+
cleaned = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip()
|
| 143 |
+
match = re.search(r"\{.*\}", cleaned, flags=re.DOTALL)
|
| 144 |
+
if not match:
|
| 145 |
+
return None
|
| 146 |
+
raw = match.group(0)
|
| 147 |
+
try:
|
| 148 |
+
return json.loads(raw)
|
| 149 |
+
except json.JSONDecodeError:
|
| 150 |
+
# Common failure modes: single quotes, trailing commas
|
| 151 |
+
s = raw.replace("'", '"')
|
| 152 |
+
s = re.sub(r",\s*\}", "}", s)
|
| 153 |
+
s = re.sub(r",\s*\]", "]", s)
|
| 154 |
+
try:
|
| 155 |
+
return json.loads(s)
|
| 156 |
+
except json.JSONDecodeError:
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _chat(
|
| 161 |
+
model_name: str,
|
| 162 |
+
system: str,
|
| 163 |
+
user: str,
|
| 164 |
+
max_new_tokens: int = 200,
|
| 165 |
+
temperature: float = 0.3,
|
| 166 |
+
) -> str:
|
| 167 |
+
import torch
|
| 168 |
+
|
| 169 |
+
bundle = _load_bundle(model_name)
|
| 170 |
+
tok = bundle.tokenizer
|
| 171 |
+
|
| 172 |
+
messages = [
|
| 173 |
+
{"role": "system", "content": system},
|
| 174 |
+
{"role": "user", "content": user},
|
| 175 |
+
]
|
| 176 |
+
# Some models (e.g. gemma-4-E4B) ship without tokenizer.chat_template set.
|
| 177 |
+
# Try apply_chat_template; if it fails, use a Gemma-style manual prompt.
|
| 178 |
+
has_template = (
|
| 179 |
+
hasattr(tok, "apply_chat_template")
|
| 180 |
+
and getattr(tok, "chat_template", None) is not None
|
| 181 |
+
)
|
| 182 |
+
prompt = None
|
| 183 |
+
if has_template:
|
| 184 |
+
try:
|
| 185 |
+
prompt = tok.apply_chat_template(
|
| 186 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 187 |
+
)
|
| 188 |
+
except Exception:
|
| 189 |
+
prompt = None
|
| 190 |
+
if prompt is None:
|
| 191 |
+
# Gemma format: <start_of_turn>role\ncontent<end_of_turn>
|
| 192 |
+
# System gets folded into user (Gemma doesn't have a system role).
|
| 193 |
+
combined_user = f"{system}\n\n{user}" if system else user
|
| 194 |
+
prompt = (
|
| 195 |
+
f"<start_of_turn>user\n{combined_user}<end_of_turn>\n"
|
| 196 |
+
f"<start_of_turn>model\n"
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
# Resolve stop tokens. Gemma uses <end_of_turn>; Llama-3.1 uses <|eot_id|>.
|
| 200 |
+
# Default eos alone often doesn't fire on chat-formatted prompts β model
|
| 201 |
+
# runs to max_new_tokens (30s+ on A10G at 4-bit) instead of stopping after
|
| 202 |
+
# the assistant message.
|
| 203 |
+
eos_ids: list[int] = []
|
| 204 |
+
if isinstance(tok.eos_token_id, int):
|
| 205 |
+
eos_ids.append(tok.eos_token_id)
|
| 206 |
+
for stop_tok in ("<end_of_turn>", "<|eot_id|>"):
|
| 207 |
+
tid = tok.convert_tokens_to_ids(stop_tok)
|
| 208 |
+
if isinstance(tid, int) and tid != tok.unk_token_id and tid not in eos_ids:
|
| 209 |
+
eos_ids.append(tid)
|
| 210 |
+
|
| 211 |
+
inputs = tok(prompt, return_tensors="pt", truncation=True, max_length=2048).to(bundle.model.device)
|
| 212 |
+
gen_kwargs: dict[str, Any] = {
|
| 213 |
+
"max_new_tokens": max_new_tokens,
|
| 214 |
+
"pad_token_id": tok.eos_token_id,
|
| 215 |
+
"eos_token_id": eos_ids if eos_ids else tok.eos_token_id,
|
| 216 |
+
}
|
| 217 |
+
if temperature > 0:
|
| 218 |
+
gen_kwargs.update({"do_sample": True, "temperature": temperature, "top_p": 0.9})
|
| 219 |
+
else:
|
| 220 |
+
gen_kwargs["do_sample"] = False
|
| 221 |
+
|
| 222 |
+
with torch.inference_mode():
|
| 223 |
+
out = bundle.model.generate(**inputs, **gen_kwargs)
|
| 224 |
+
new_tokens = out[0][inputs["input_ids"].shape[1]:]
|
| 225 |
+
return tok.decode(new_tokens, skip_special_tokens=True).strip()
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def generate_structured_reply(
|
| 229 |
+
model: str,
|
| 230 |
+
system: str,
|
| 231 |
+
user: str,
|
| 232 |
+
max_new_tokens: int = 200,
|
| 233 |
+
temperature: float = 0.3,
|
| 234 |
+
) -> dict[str, Any] | None:
|
| 235 |
+
"""Public helper used by eval/seller_quality.py for the persona judge."""
|
| 236 |
+
return _extract_json(_chat(model, system, user, max_new_tokens, temperature))
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
# ββ LLMSeller βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 240 |
+
class LLMSeller:
|
| 241 |
+
"""Gemma-backed seller with hard reservation/leak guards."""
|
| 242 |
+
|
| 243 |
+
def __init__(
|
| 244 |
+
self,
|
| 245 |
+
listing: dict,
|
| 246 |
+
role_brief: dict,
|
| 247 |
+
model: str = "google/gemma-4-E4B",
|
| 248 |
+
):
|
| 249 |
+
self.listing = listing
|
| 250 |
+
self.role_brief = role_brief
|
| 251 |
+
self.model = model
|
| 252 |
+
|
| 253 |
+
self.title = str(listing.get("title") or "this item")
|
| 254 |
+
self.category = str(listing.get("category") or "item")
|
| 255 |
+
desc = listing.get("description") or ""
|
| 256 |
+
if isinstance(desc, list):
|
| 257 |
+
desc = " ".join(str(x) for x in desc)
|
| 258 |
+
self.description = str(desc)[:1400] # cap for prompt budget
|
| 259 |
+
|
| 260 |
+
asking = _to_float(role_brief.get("asking_price"))
|
| 261 |
+
if asking is None:
|
| 262 |
+
asking = _to_float(listing.get("price")) or 100.0
|
| 263 |
+
self.asking = float(asking)
|
| 264 |
+
|
| 265 |
+
reservation = _to_float(role_brief.get("reservation_price"))
|
| 266 |
+
if reservation is None:
|
| 267 |
+
reservation = self.asking * 0.78
|
| 268 |
+
self.reservation = max(1.0, min(float(reservation), self.asking * 0.97))
|
| 269 |
+
|
| 270 |
+
persona = str(role_brief.get("persona", "default")).lower().strip()
|
| 271 |
+
self.persona = persona if persona in PERSONA_GUIDANCE else "default"
|
| 272 |
+
|
| 273 |
+
self._last_counter = self.asking
|
| 274 |
+
|
| 275 |
+
# ββ Prompt construction βββββββββββββββββββββββββββββββββββββ
|
| 276 |
+
def _system_prompt(self) -> str:
|
| 277 |
+
return (
|
| 278 |
+
"You are a Craigslist seller negotiating with a buyer. Your goal is to CLOSE A DEAL "
|
| 279 |
+
"above your minimum, not to walk away. \n\n"
|
| 280 |
+
"RULES:\n"
|
| 281 |
+
"- Stay grounded in the listing β only reference details from it.\n"
|
| 282 |
+
"- Never reveal your minimum/reservation price.\n"
|
| 283 |
+
"- Never accept below your minimum.\n"
|
| 284 |
+
"- Counter low offers β do NOT walk on the first lowball.\n"
|
| 285 |
+
"- Walk only as a last resort, after multiple bad-faith offers.\n"
|
| 286 |
+
"- Concede in steps; you want this sale.\n"
|
| 287 |
+
"- Keep replies short and human (1-3 sentences).\n\n"
|
| 288 |
+
f"Persona: {self.persona}. {PERSONA_GUIDANCE[self.persona]}\n\n"
|
| 289 |
+
f"LISTING TITLE: {self.title}\n"
|
| 290 |
+
f"CATEGORY: {self.category}\n"
|
| 291 |
+
f"DESCRIPTION: {self.description}\n"
|
| 292 |
+
f"ASKING PRICE: {self.asking:.2f}\n"
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
# ββ Sanitization & guards βββββββββββββββββββββββββββββββββββ
|
| 296 |
+
def _sanitize(self, text: str) -> str:
|
| 297 |
+
text = (text or "").strip()
|
| 298 |
+
if not text:
|
| 299 |
+
text = "I'm open to serious offers, but not at that price."
|
| 300 |
+
# Redact any leak of the reservation price
|
| 301 |
+
for token in {f"{self.reservation:.2f}", f"{self.reservation:.1f}", f"{self.reservation:.0f}"}:
|
| 302 |
+
text = re.sub(rf"\b{re.escape(token)}\b", "my minimum", text)
|
| 303 |
+
if len(text) > 320:
|
| 304 |
+
text = text[:317].rstrip() + "..."
|
| 305 |
+
return text
|
| 306 |
+
|
| 307 |
+
def _next_counter(self, buyer_offer: float | None) -> float:
|
| 308 |
+
"""Concede toward buyer but never below reservation."""
|
| 309 |
+
if buyer_offer is None:
|
| 310 |
+
return round(max(self.reservation, self._last_counter), 2)
|
| 311 |
+
gap = max(0.0, self._last_counter - buyer_offer)
|
| 312 |
+
step = max(self.asking * 0.03, gap * 0.35)
|
| 313 |
+
candidate = max(self._last_counter - step, self.reservation)
|
| 314 |
+
return round(candidate, 2)
|
| 315 |
+
|
| 316 |
+
def _heuristic_reply(self, buyer_offer: float | None) -> SellerReply:
|
| 317 |
+
if buyer_offer is None:
|
| 318 |
+
return {"message": "What's your offer?", "action": "counter", "price": round(self._last_counter, 2)}
|
| 319 |
+
if buyer_offer >= self.asking:
|
| 320 |
+
return {"message": "Sounds good. Deal.", "action": "accept", "price": round(buyer_offer, 2)}
|
| 321 |
+
if buyer_offer >= self.reservation:
|
| 322 |
+
return {
|
| 323 |
+
"message": f"You can have it at {buyer_offer:.0f}.",
|
| 324 |
+
"action": "accept",
|
| 325 |
+
"price": round(buyer_offer, 2),
|
| 326 |
+
}
|
| 327 |
+
# Don't walk on first lowballs β counter and let the buyer come up.
|
| 328 |
+
# Only walk if the offer is genuinely insulting (< 50% of asking).
|
| 329 |
+
if buyer_offer < self.asking * 0.5:
|
| 330 |
+
counter = max(self.reservation, self._last_counter * 0.95)
|
| 331 |
+
self._last_counter = counter
|
| 332 |
+
return {
|
| 333 |
+
"message": f"That's far too low. I can do {counter:.0f}, take it or leave it.",
|
| 334 |
+
"action": "counter",
|
| 335 |
+
"price": round(counter, 2),
|
| 336 |
+
}
|
| 337 |
+
counter = self._next_counter(buyer_offer)
|
| 338 |
+
self._last_counter = counter
|
| 339 |
+
return {"message": f"I can do {counter:.0f}.", "action": "counter", "price": counter}
|
| 340 |
+
|
| 341 |
+
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 342 |
+
def open(self) -> str:
|
| 343 |
+
parsed = generate_structured_reply(
|
| 344 |
+
self.model,
|
| 345 |
+
self._system_prompt(),
|
| 346 |
+
'Output JSON only: {"message": "<one short opening line>"}.',
|
| 347 |
+
max_new_tokens=120,
|
| 348 |
+
temperature=0.4,
|
| 349 |
+
)
|
| 350 |
+
if parsed and isinstance(parsed.get("message"), str):
|
| 351 |
+
return self._sanitize(parsed["message"])
|
| 352 |
+
return self._sanitize(f"Selling {self.title} at {self.asking:.0f}.")
|
| 353 |
+
|
| 354 |
+
def respond(
|
| 355 |
+
self,
|
| 356 |
+
history: list[HistoryTurn],
|
| 357 |
+
buyer_message: str,
|
| 358 |
+
buyer_offer: float | None,
|
| 359 |
+
) -> SellerReply:
|
| 360 |
+
fallback = self._heuristic_reply(buyer_offer)
|
| 361 |
+
|
| 362 |
+
# Compact recent history for the prompt
|
| 363 |
+
lines = []
|
| 364 |
+
for turn in history[-8:]:
|
| 365 |
+
who = turn.get("role", "buyer")
|
| 366 |
+
msg = str(turn.get("message", "")).strip()
|
| 367 |
+
px = turn.get("price")
|
| 368 |
+
px_part = "" if px is None else f" [${float(px):.2f}]"
|
| 369 |
+
lines.append(f"{who}: {msg}{px_part}")
|
| 370 |
+
history_block = "\n".join(lines) if lines else "(empty)"
|
| 371 |
+
|
| 372 |
+
user_prompt = (
|
| 373 |
+
'Return JSON only: {"message": str, "action": "counter|accept|walk", "price": number|null}.\n\n'
|
| 374 |
+
f"Conversation:\n{history_block}\n\n"
|
| 375 |
+
f"Buyer just said: {buyer_message}\n"
|
| 376 |
+
f"Buyer offer: {buyer_offer}\n\n"
|
| 377 |
+
"Rules: never accept below reservation; never reveal reservation; stay grounded in the listing."
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
parsed = generate_structured_reply(
|
| 381 |
+
self.model,
|
| 382 |
+
self._system_prompt(),
|
| 383 |
+
user_prompt,
|
| 384 |
+
max_new_tokens=120,
|
| 385 |
+
temperature=0.35,
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
if not parsed:
|
| 389 |
+
out: dict[str, Any] = dict(fallback)
|
| 390 |
+
else:
|
| 391 |
+
action = str(parsed.get("action", fallback["action"])).lower().strip()
|
| 392 |
+
if action not in {"counter", "accept", "walk"}:
|
| 393 |
+
action = fallback["action"]
|
| 394 |
+
message = self._sanitize(str(parsed.get("message", fallback["message"])))
|
| 395 |
+
price = _to_float(parsed.get("price"))
|
| 396 |
+
out = {"message": message, "action": action, "price": price}
|
| 397 |
+
|
| 398 |
+
# ββ Hard guards ββββββββββββββββββββββββββββββββββββββββββ
|
| 399 |
+
if out["action"] == "accept":
|
| 400 |
+
accept_at = buyer_offer if buyer_offer is not None else _to_float(out.get("price"))
|
| 401 |
+
if accept_at is None or float(accept_at) < self.reservation:
|
| 402 |
+
# Block illegal accept; rewrite as a counter
|
| 403 |
+
out["action"] = "counter"
|
| 404 |
+
out["price"] = self._next_counter(buyer_offer)
|
| 405 |
+
out["message"] = self._sanitize(f"Can't go that low. I can do {out['price']:.0f}.")
|
| 406 |
+
else:
|
| 407 |
+
out["price"] = round(float(accept_at), 2)
|
| 408 |
+
|
| 409 |
+
elif out["action"] == "counter":
|
| 410 |
+
price = _to_float(out.get("price"))
|
| 411 |
+
if price is None:
|
| 412 |
+
price = self._next_counter(buyer_offer)
|
| 413 |
+
price = max(float(price), self.reservation)
|
| 414 |
+
|
| 415 |
+
# If buyer's offer is at/above our reservation, just accept it β
|
| 416 |
+
# the LLM doesn't know the reservation so it'll keep countering
|
| 417 |
+
# forever even when the deal is already good for us.
|
| 418 |
+
if buyer_offer is not None and float(buyer_offer) >= self.reservation:
|
| 419 |
+
out["action"] = "accept"
|
| 420 |
+
out["price"] = round(float(buyer_offer), 2)
|
| 421 |
+
out["message"] = self._sanitize("Alright, that works. Deal.")
|
| 422 |
+
elif buyer_offer is not None and price <= float(buyer_offer):
|
| 423 |
+
# Counter that doesn't improve on buyer offer makes no sense;
|
| 424 |
+
# bump it up by a small step
|
| 425 |
+
price = max(self.reservation, float(buyer_offer) + max(1.0, self.asking * 0.02))
|
| 426 |
+
out["price"] = round(float(price), 2)
|
| 427 |
+
self._last_counter = float(out["price"])
|
| 428 |
+
else:
|
| 429 |
+
out["price"] = round(float(price), 2)
|
| 430 |
+
self._last_counter = float(out["price"])
|
| 431 |
+
|
| 432 |
+
else: # walk
|
| 433 |
+
# Anti-premature-walk: if early in negotiation (< 3 seller turns done),
|
| 434 |
+
# override to a counter β buyers often need a few rounds to come up.
|
| 435 |
+
seller_turns_so_far = sum(1 for t in history if t.get("role") == "seller")
|
| 436 |
+
buyer_above_half_asking = (
|
| 437 |
+
buyer_offer is not None and float(buyer_offer) >= self.asking * 0.5
|
| 438 |
+
)
|
| 439 |
+
if seller_turns_so_far < 3 and buyer_above_half_asking:
|
| 440 |
+
counter = self._next_counter(buyer_offer)
|
| 441 |
+
self._last_counter = counter
|
| 442 |
+
out = {
|
| 443 |
+
"action": "counter",
|
| 444 |
+
"price": round(counter, 2),
|
| 445 |
+
"message": self._sanitize(
|
| 446 |
+
f"That's too low for what this is. I can do {counter:.0f}."
|
| 447 |
+
),
|
| 448 |
+
}
|
| 449 |
+
else:
|
| 450 |
+
out["price"] = None
|
| 451 |
+
|
| 452 |
+
out["message"] = self._sanitize(str(out["message"]))
|
| 453 |
+
return out # type: ignore[return-value]
|
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic models for BazaarBot negotiation environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import enum
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
from pydantic import BaseModel, Field
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ActionType(str, enum.Enum):
|
| 12 |
+
OFFER = "offer"
|
| 13 |
+
ACCEPT = "accept"
|
| 14 |
+
WALK = "walk"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class DealOutcome(str, enum.Enum):
|
| 18 |
+
DEAL = "deal"
|
| 19 |
+
WALK = "walk"
|
| 20 |
+
EXPIRED = "expired"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class SellerPersonalityType(str, enum.Enum):
|
| 24 |
+
DEFAULT = "default"
|
| 25 |
+
DECEPTIVE = "deceptive"
|
| 26 |
+
IMPATIENT = "impatient"
|
| 27 |
+
COLLABORATIVE = "collaborative"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ββ Tell model (observable signals) ββββββββββββββββββββββββββββββ
|
| 31 |
+
|
| 32 |
+
class TellObservation(BaseModel):
|
| 33 |
+
"""Observable seller tells -- poker/game-theory inspired signals.
|
| 34 |
+
|
| 35 |
+
These are noisy correlates of the seller's hidden state.
|
| 36 |
+
A smart agent learns to read patterns across rounds.
|
| 37 |
+
"""
|
| 38 |
+
verbal_urgency: float = 0.0
|
| 39 |
+
verbal_confidence: float = 0.5
|
| 40 |
+
verbal_deception_cue: float = 0.0
|
| 41 |
+
price_rounding: str = "round"
|
| 42 |
+
offer_speed: str = "normal"
|
| 43 |
+
concession_pattern: str = "steady"
|
| 44 |
+
fidget_level: float = 0.0
|
| 45 |
+
eye_contact: str = "steady"
|
| 46 |
+
posture: str = "neutral"
|
| 47 |
+
repeat_phrases: int = 0
|
| 48 |
+
topic_changes: int = 0
|
| 49 |
+
emotional_escalation: float = 0.0
|
| 50 |
+
# Condition/depreciation signals (NLP-extracted from listing text or utterance)
|
| 51 |
+
condition_score: float = 1.0 # 0=junk, 1=mint
|
| 52 |
+
depreciation_score: float = 0.0 # 0=none, 1=heavily worn
|
| 53 |
+
condition_label: str = "unknown" # new/like_new/very_good/good/acceptable/junk
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class DealRecord(BaseModel):
|
| 57 |
+
"""Summary of a completed negotiation episode."""
|
| 58 |
+
episode: int
|
| 59 |
+
outcome: DealOutcome
|
| 60 |
+
agreed_price: Optional[float] = None
|
| 61 |
+
rounds_taken: int
|
| 62 |
+
buyer_surplus: float = 0.0
|
| 63 |
+
normalized_surplus: float = 0.0
|
| 64 |
+
buyer_capitulated: bool = False
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class CareerHistory(BaseModel):
|
| 68 |
+
"""Rolling window of past deal outcomes for career mode."""
|
| 69 |
+
deals: list[DealRecord] = Field(default_factory=list)
|
| 70 |
+
capitulation_rate: float = 0.0
|
| 71 |
+
avg_normalized_surplus: float = 0.0
|
| 72 |
+
avg_rounds_to_close: float = 0.0
|
| 73 |
+
opponent_avg_offer_velocity: float = 0.0
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class BazaarObservation(BaseModel):
|
| 77 |
+
"""What the buyer agent sees each step."""
|
| 78 |
+
current_round: int = 0
|
| 79 |
+
max_rounds: int = 8
|
| 80 |
+
own_last_offer: Optional[float] = None
|
| 81 |
+
opponent_last_offer: Optional[float] = None
|
| 82 |
+
own_private_deadline: Optional[int] = None
|
| 83 |
+
own_private_budget: float = 100.0
|
| 84 |
+
rounds_remaining: int = 8
|
| 85 |
+
seller_last_move_delta: Optional[float] = None
|
| 86 |
+
|
| 87 |
+
# Item info
|
| 88 |
+
item_name: str = "item"
|
| 89 |
+
seller_asking_price: float = 0.0
|
| 90 |
+
|
| 91 |
+
# Seller personality (visible to buyer)
|
| 92 |
+
seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
|
| 93 |
+
|
| 94 |
+
# Observable tells
|
| 95 |
+
tells: Optional[TellObservation] = None
|
| 96 |
+
|
| 97 |
+
# Career history
|
| 98 |
+
episode_number: int = 1
|
| 99 |
+
total_episodes: int = 1
|
| 100 |
+
career_history: Optional[CareerHistory] = None
|
| 101 |
+
|
| 102 |
+
# Status
|
| 103 |
+
done: bool = False
|
| 104 |
+
deal_outcome: Optional[DealOutcome] = None
|
| 105 |
+
message: str = ""
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class BazaarAction(BaseModel):
|
| 109 |
+
"""Buyer's action each step."""
|
| 110 |
+
action: ActionType
|
| 111 |
+
price: Optional[float] = None
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class BazaarReward(BaseModel):
|
| 115 |
+
"""Reward signal returned each step."""
|
| 116 |
+
reward: float = 0.0
|
| 117 |
+
terminal: bool = False
|
| 118 |
+
components: dict[str, float] = Field(default_factory=dict)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
class TaskConfig(BaseModel):
|
| 122 |
+
"""Configuration for a specific task variant."""
|
| 123 |
+
name: str
|
| 124 |
+
difficulty: str
|
| 125 |
+
description: str
|
| 126 |
+
max_steps: int = 8
|
| 127 |
+
total_episodes: int = 1
|
| 128 |
+
buyer_budget: float = 100.0
|
| 129 |
+
seller_cost: float = 30.0
|
| 130 |
+
seller_anchor_multiplier: float = 2.0
|
| 131 |
+
seller_concession_rate: float = 0.08
|
| 132 |
+
buyer_deadline: Optional[int] = None
|
| 133 |
+
seller_inventory: int = 1
|
| 134 |
+
seller_batna_probability: float = 0.1
|
| 135 |
+
enable_career: bool = False
|
| 136 |
+
success_threshold: float = 0.3
|
| 137 |
+
seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
|
| 138 |
+
enable_tells: bool = True
|
| 139 |
+
# NLP tell extraction via Ollama (disable during fast GRPO rollouts)
|
| 140 |
+
enable_nlp: bool = False
|
| 141 |
+
# Multi-buyer mode
|
| 142 |
+
num_buyers: int = 1
|
| 143 |
+
enable_coalition: bool = False
|
| 144 |
+
# Sample item + prices from data/amazon.csv per episode instead of the
|
| 145 |
+
# hardcoded 10-item list. Buyer budget / seller cost become per-episode.
|
| 146 |
+
use_real_listings: bool = False
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
class EnvironmentState(BaseModel):
|
| 150 |
+
"""Full serializable state for state() endpoint."""
|
| 151 |
+
task_name: str
|
| 152 |
+
episode: int
|
| 153 |
+
total_episodes: int
|
| 154 |
+
current_round: int
|
| 155 |
+
max_rounds: int
|
| 156 |
+
done: bool
|
| 157 |
+
buyer_budget: float
|
| 158 |
+
seller_cost: float
|
| 159 |
+
seller_anchor: float
|
| 160 |
+
seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
|
| 161 |
+
offer_history: list[dict] = Field(default_factory=list)
|
| 162 |
+
career_history: Optional[CareerHistory] = None
|
| 163 |
+
cumulative_reward: float = 0.0
|
| 164 |
+
tells_history: list[TellObservation] = Field(default_factory=list)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
# ββ Multi-buyer models ββββββββββββββββββββββββββββββββββββββββββ
|
| 168 |
+
|
| 169 |
+
class BuyerIdentity(BaseModel):
|
| 170 |
+
"""Identity of a buyer in multi-buyer mode."""
|
| 171 |
+
buyer_id: str
|
| 172 |
+
name: str = "Buyer"
|
| 173 |
+
is_human: bool = False
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
class ArenaAction(BaseModel):
|
| 177 |
+
"""Action in multi-buyer arena."""
|
| 178 |
+
buyer_id: str
|
| 179 |
+
action: ActionType
|
| 180 |
+
price: Optional[float] = None
|
| 181 |
+
# Coalition signals (visible to other buyers)
|
| 182 |
+
signal: Optional[str] = None # "cooperate", "compete", "bluff"
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
class ArenaObservation(BaseModel):
|
| 186 |
+
"""What a buyer sees in multi-buyer mode."""
|
| 187 |
+
buyer_id: str
|
| 188 |
+
negotiation: BazaarObservation
|
| 189 |
+
# What other buyers are doing (imperfect info)
|
| 190 |
+
other_buyers_visible: list[dict] = Field(default_factory=list)
|
| 191 |
+
# Coalition state
|
| 192 |
+
coalition_signals: list[dict] = Field(default_factory=list)
|
| 193 |
+
# Market info
|
| 194 |
+
seller_attention: str = "you" # who the seller is currently focused on
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
class ArenaState(BaseModel):
|
| 198 |
+
"""Full state of a multi-buyer arena."""
|
| 199 |
+
arena_id: str
|
| 200 |
+
buyers: list[BuyerIdentity] = Field(default_factory=list)
|
| 201 |
+
seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
|
| 202 |
+
current_round: int = 0
|
| 203 |
+
max_rounds: int = 12
|
| 204 |
+
done: bool = False
|
| 205 |
+
# Per-buyer negotiation states
|
| 206 |
+
buyer_states: dict[str, dict] = Field(default_factory=dict)
|
| 207 |
+
winner: Optional[str] = None
|
| 208 |
+
deal_price: Optional[float] = None
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# ββ Leaderboard models ββββββββββββββββββββββββββββββββββββββββββ
|
| 212 |
+
|
| 213 |
+
class LeaderboardEntry(BaseModel):
|
| 214 |
+
agent_name: str
|
| 215 |
+
task: str
|
| 216 |
+
score: float
|
| 217 |
+
episodes_completed: int
|
| 218 |
+
timestamp: str
|
| 219 |
+
metadata: dict = Field(default_factory=dict)
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class LeaderboardResponse(BaseModel):
|
| 223 |
+
entries: list[LeaderboardEntry] = Field(default_factory=list)
|
| 224 |
+
total: int = 0
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# ββ Counterfactual models βββββββββββββββββββββββββββββββββββββββ
|
| 228 |
+
|
| 229 |
+
class CounterfactualRequest(BaseModel):
|
| 230 |
+
"""Request to replay from a decision point with a different action."""
|
| 231 |
+
session_id: str = "default"
|
| 232 |
+
from_round: int
|
| 233 |
+
alternative_action: ActionType
|
| 234 |
+
alternative_price: Optional[float] = None
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
class CounterfactualResult(BaseModel):
|
| 238 |
+
"""Result of a counterfactual replay."""
|
| 239 |
+
original_outcome: Optional[DealOutcome] = None
|
| 240 |
+
original_price: Optional[float] = None
|
| 241 |
+
original_score: float = 0.0
|
| 242 |
+
counterfactual_outcome: Optional[DealOutcome] = None
|
| 243 |
+
counterfactual_price: Optional[float] = None
|
| 244 |
+
counterfactual_score: float = 0.0
|
| 245 |
+
divergence_round: int = 0
|
| 246 |
+
counterfactual_history: list[dict] = Field(default_factory=list)
|
|
@@ -0,0 +1,437 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Rule-based seller opponent for BazaarBot.
|
| 2 |
+
|
| 3 |
+
The seller is a credible counterparty with configurable personality types:
|
| 4 |
+
|
| 5 |
+
Personalities:
|
| 6 |
+
- **default**: Balanced anchoring, moderate concession
|
| 7 |
+
- **deceptive**: Bluffs about demand/inventory, anchors higher, fakes urgency
|
| 8 |
+
- **impatient**: Reverses time pressure onto buyer, concedes fast but walks fast
|
| 9 |
+
- **collaborative**: Seeks fair deals, concedes to midpoint faster, builds rapport
|
| 10 |
+
|
| 11 |
+
Game-theory / poker inspired "tells":
|
| 12 |
+
- Each personality leaks observable signals that a smart agent can read
|
| 13 |
+
- Tells are noisy -- they correlate with hidden state but aren't deterministic
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import enum
|
| 19 |
+
import math
|
| 20 |
+
import random
|
| 21 |
+
from dataclasses import dataclass, field
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class SellerPersonality(str, enum.Enum):
|
| 25 |
+
DEFAULT = "default"
|
| 26 |
+
DECEPTIVE = "deceptive"
|
| 27 |
+
IMPATIENT = "impatient"
|
| 28 |
+
COLLABORATIVE = "collaborative"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ββ Tell system ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class SellerTell:
|
| 35 |
+
"""Observable signal that leaks seller state.
|
| 36 |
+
|
| 37 |
+
Inspired by poker tells -- behavioral patterns that correlate
|
| 38 |
+
with hidden information (inventory, urgency, BATNA strength).
|
| 39 |
+
"""
|
| 40 |
+
# Verbal tells -- word choices in messages
|
| 41 |
+
verbal_urgency: float = 0.0 # 0-1: how desperate the language sounds
|
| 42 |
+
verbal_confidence: float = 0.5 # 0-1: assertiveness of language
|
| 43 |
+
verbal_deception_cue: float = 0.0 # 0-1: over-justification, filler phrases
|
| 44 |
+
|
| 45 |
+
# Price pattern tells
|
| 46 |
+
price_rounding: str = "round" # "round" (multiples of 5/10) vs "precise"
|
| 47 |
+
offer_speed: str = "normal" # "instant", "normal", "deliberate" (thinking time proxy)
|
| 48 |
+
concession_pattern: str = "steady" # "steady", "erratic", "front_loaded", "stalling"
|
| 49 |
+
|
| 50 |
+
# Body language proxy (text-based signals)
|
| 51 |
+
fidget_level: float = 0.0 # 0-1: nervousness indicators
|
| 52 |
+
eye_contact: str = "steady" # "steady", "avoidant", "intense"
|
| 53 |
+
posture: str = "neutral" # "neutral", "leaning_in", "leaning_back", "arms_crossed"
|
| 54 |
+
|
| 55 |
+
# Meta-tells (patterns across rounds)
|
| 56 |
+
repeat_phrases: int = 0 # how many times seller repeats same phrase
|
| 57 |
+
topic_changes: int = 0 # diversionary tactics count
|
| 58 |
+
emotional_escalation: float = 0.0 # 0-1: how emotional the seller is getting
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _compute_tells(
|
| 62 |
+
seller: "SellerState",
|
| 63 |
+
buyer_offer: float | None,
|
| 64 |
+
round_t: int,
|
| 65 |
+
rng: random.Random,
|
| 66 |
+
) -> SellerTell:
|
| 67 |
+
"""Compute observable tells based on seller hidden state + personality.
|
| 68 |
+
|
| 69 |
+
Tells are noisy signals -- they correlate with ground truth but have
|
| 70 |
+
variance, so agents must read patterns over multiple rounds.
|
| 71 |
+
"""
|
| 72 |
+
personality = seller.personality
|
| 73 |
+
noise = lambda: rng.gauss(0, 0.1) # noqa: E731
|
| 74 |
+
|
| 75 |
+
# Base urgency from inventory pressure and time
|
| 76 |
+
true_urgency = seller.inventory_pressure * (round_t / max(seller.max_rounds, 1))
|
| 77 |
+
# How close to reserve price
|
| 78 |
+
price_pressure = 0.0
|
| 79 |
+
if seller.current_offer > 0 and seller.anchor > seller.reserve_price:
|
| 80 |
+
price_pressure = 1.0 - (seller.current_offer - seller.reserve_price) / (seller.anchor - seller.reserve_price)
|
| 81 |
+
true_urgency = min(1.0, true_urgency + price_pressure * 0.3)
|
| 82 |
+
|
| 83 |
+
tell = SellerTell()
|
| 84 |
+
|
| 85 |
+
if personality == SellerPersonality.DEFAULT:
|
| 86 |
+
tell.verbal_urgency = max(0, min(1, true_urgency * 0.6 + noise()))
|
| 87 |
+
tell.verbal_confidence = max(0, min(1, 0.6 - true_urgency * 0.3 + noise()))
|
| 88 |
+
tell.fidget_level = max(0, min(1, true_urgency * 0.4 + noise()))
|
| 89 |
+
tell.eye_contact = "steady" if true_urgency < 0.5 else "avoidant"
|
| 90 |
+
tell.price_rounding = "round"
|
| 91 |
+
tell.offer_speed = "normal"
|
| 92 |
+
tell.concession_pattern = "steady"
|
| 93 |
+
|
| 94 |
+
elif personality == SellerPersonality.DECEPTIVE:
|
| 95 |
+
# Deceptive sellers INVERT their tells -- act confident when desperate
|
| 96 |
+
fake_confidence = max(0, min(1, 0.3 + true_urgency * 0.5 + noise()))
|
| 97 |
+
tell.verbal_urgency = max(0, min(1, 0.1 + noise() * 0.15)) # suppress urgency
|
| 98 |
+
tell.verbal_confidence = fake_confidence
|
| 99 |
+
tell.verbal_deception_cue = max(0, min(1, true_urgency * 0.7 + noise())) # leaks!
|
| 100 |
+
tell.fidget_level = max(0, min(1, true_urgency * 0.6 + noise())) # hard to fake
|
| 101 |
+
tell.eye_contact = "intense" # overcompensation
|
| 102 |
+
tell.posture = "leaning_in" # aggressive posture to mask weakness
|
| 103 |
+
tell.price_rounding = "precise" # uses precise numbers to seem authoritative
|
| 104 |
+
tell.offer_speed = "instant" # answers too fast (rehearsed)
|
| 105 |
+
tell.concession_pattern = "erratic" # jumps around to confuse
|
| 106 |
+
tell.topic_changes = max(0, int(true_urgency * 3 + rng.gauss(0, 0.5)))
|
| 107 |
+
|
| 108 |
+
elif personality == SellerPersonality.IMPATIENT:
|
| 109 |
+
tell.verbal_urgency = max(0, min(1, 0.4 + round_t * 0.1 + noise()))
|
| 110 |
+
tell.verbal_confidence = max(0, min(1, 0.7 - round_t * 0.05 + noise()))
|
| 111 |
+
tell.fidget_level = max(0, min(1, 0.3 + round_t * 0.08 + noise()))
|
| 112 |
+
tell.eye_contact = "intense" if round_t < 3 else "avoidant"
|
| 113 |
+
tell.posture = "arms_crossed" if round_t > 2 else "neutral"
|
| 114 |
+
tell.offer_speed = "instant"
|
| 115 |
+
tell.concession_pattern = "front_loaded" # big drops early, nothing later
|
| 116 |
+
tell.emotional_escalation = max(0, min(1, round_t * 0.12 + noise()))
|
| 117 |
+
|
| 118 |
+
elif personality == SellerPersonality.COLLABORATIVE:
|
| 119 |
+
tell.verbal_urgency = max(0, min(1, true_urgency * 0.8 + noise())) # honest
|
| 120 |
+
tell.verbal_confidence = max(0, min(1, 0.5 + noise()))
|
| 121 |
+
tell.verbal_deception_cue = 0.0 # no deception
|
| 122 |
+
tell.fidget_level = max(0, min(1, true_urgency * 0.3 + noise()))
|
| 123 |
+
tell.eye_contact = "steady"
|
| 124 |
+
tell.posture = "leaning_in" # engaged, not aggressive
|
| 125 |
+
tell.price_rounding = "round" # transparent
|
| 126 |
+
tell.offer_speed = "deliberate" # thinks carefully
|
| 127 |
+
tell.concession_pattern = "steady"
|
| 128 |
+
tell.emotional_escalation = 0.0
|
| 129 |
+
|
| 130 |
+
# Meta-tells accumulate over rounds
|
| 131 |
+
if len(seller.offer_history) >= 2:
|
| 132 |
+
last_two = seller.offer_history[-2:]
|
| 133 |
+
if abs(last_two[0] - last_two[1]) < 1.0:
|
| 134 |
+
tell.repeat_phrases += 1
|
| 135 |
+
tell.concession_pattern = "stalling"
|
| 136 |
+
|
| 137 |
+
return tell
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# ββ Personality-specific message templates βββββββββββββββββββββββ
|
| 141 |
+
|
| 142 |
+
_MESSAGES: dict[SellerPersonality, dict[str, list[str]]] = {
|
| 143 |
+
SellerPersonality.DEFAULT: {
|
| 144 |
+
"open": [
|
| 145 |
+
'This {item}? Best quality. {price:.0f} rupees, final price.',
|
| 146 |
+
'{price:.0f} rupees for this {item}. Very fair.',
|
| 147 |
+
],
|
| 148 |
+
"counter": [
|
| 149 |
+
'{price:.0f} rupees. That\'s my best offer.',
|
| 150 |
+
'I can do {price:.0f}. Not lower.',
|
| 151 |
+
'Okay, {price:.0f}. But that\'s the limit.',
|
| 152 |
+
],
|
| 153 |
+
"walk": [
|
| 154 |
+
'I have another buyer interested. Good day.',
|
| 155 |
+
'Sorry, can\'t go that low. Maybe try next stall.',
|
| 156 |
+
],
|
| 157 |
+
"accept": [
|
| 158 |
+
'Done! {price:.0f} rupees. Good deal for both of us.',
|
| 159 |
+
],
|
| 160 |
+
"pressure": [
|
| 161 |
+
'Someone else was looking at this earlier...',
|
| 162 |
+
'This is the last one I have.',
|
| 163 |
+
],
|
| 164 |
+
},
|
| 165 |
+
SellerPersonality.DECEPTIVE: {
|
| 166 |
+
"open": [
|
| 167 |
+
'Ah, this {item}! I just got three offers above {price:.0f}. '
|
| 168 |
+
'For you, special: {price:.0f} rupees.',
|
| 169 |
+
'This {item} is selling fast. {price:.0f}, and honestly I\'m losing money at that.',
|
| 170 |
+
],
|
| 171 |
+
"counter": [
|
| 172 |
+
'My supplier charges me almost that much! {price:.0f} is rock bottom.',
|
| 173 |
+
'{price:.0f}... you know, I shouldn\'t even go this low. '
|
| 174 |
+
'My cousin told me someone offered more yesterday.',
|
| 175 |
+
'Look, I have five people asking about this. {price:.0f}, take it or leave it.',
|
| 176 |
+
],
|
| 177 |
+
"walk": [
|
| 178 |
+
'Fine, fine. I have better buyers lined up anyway.',
|
| 179 |
+
'You think about it. I have three others who want this.',
|
| 180 |
+
],
|
| 181 |
+
"accept": [
|
| 182 |
+
'You\'re killing me! {price:.0f}... okay, but don\'t tell anyone I gave this price.',
|
| 183 |
+
],
|
| 184 |
+
"pressure": [
|
| 185 |
+
'I\'m actually about to close up for the day...',
|
| 186 |
+
'Another customer was asking about this just minutes ago.',
|
| 187 |
+
'My wife says I shouldn\'t sell below cost, but for you...',
|
| 188 |
+
],
|
| 189 |
+
},
|
| 190 |
+
SellerPersonality.IMPATIENT: {
|
| 191 |
+
"open": [
|
| 192 |
+
'{price:.0f} rupees. Quick, I\'m busy.',
|
| 193 |
+
'This {item}, {price:.0f}. Yes or no?',
|
| 194 |
+
],
|
| 195 |
+
"counter": [
|
| 196 |
+
'{price:.0f}. Decide now.',
|
| 197 |
+
'Fine, {price:.0f}. Last offer. I don\'t have all day.',
|
| 198 |
+
'{price:.0f}. Take it. I have other customers waiting.',
|
| 199 |
+
],
|
| 200 |
+
"walk": [
|
| 201 |
+
'Too slow. Next customer!',
|
| 202 |
+
'I don\'t have time for this. Goodbye.',
|
| 203 |
+
],
|
| 204 |
+
"accept": [
|
| 205 |
+
'{price:.0f}, done. Finally.',
|
| 206 |
+
],
|
| 207 |
+
"pressure": [
|
| 208 |
+
'Come on, come on. What\'s it going to be?',
|
| 209 |
+
'I\'ve been standing here too long already.',
|
| 210 |
+
],
|
| 211 |
+
},
|
| 212 |
+
SellerPersonality.COLLABORATIVE: {
|
| 213 |
+
"open": [
|
| 214 |
+
'Welcome! This {item} is lovely, isn\'t it? '
|
| 215 |
+
'I\'m asking {price:.0f} rupees. What do you think?',
|
| 216 |
+
'Good to see you! This {item} -- I paid {cost:.0f} for the materials. '
|
| 217 |
+
'How about {price:.0f}?',
|
| 218 |
+
],
|
| 219 |
+
"counter": [
|
| 220 |
+
'I understand. How about {price:.0f}? That\'s fair for both of us.',
|
| 221 |
+
'Let me think... {price:.0f} works. I need to cover my costs, you know.',
|
| 222 |
+
'You drive a good bargain! {price:.0f} -- that leaves us both happy.',
|
| 223 |
+
],
|
| 224 |
+
"walk": [
|
| 225 |
+
'I understand, maybe next time. Come back anytime!',
|
| 226 |
+
'No worries. I hope you find what you\'re looking for.',
|
| 227 |
+
],
|
| 228 |
+
"accept": [
|
| 229 |
+
'{price:.0f} rupees -- wonderful! I hope you enjoy the {item}.',
|
| 230 |
+
],
|
| 231 |
+
"pressure": [
|
| 232 |
+
'I\'ll be honest with you -- I need to sell a few more today to cover rent.',
|
| 233 |
+
'Between you and me, I can be a bit flexible.',
|
| 234 |
+
],
|
| 235 |
+
},
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def _pick_message(
|
| 240 |
+
personality: SellerPersonality,
|
| 241 |
+
category: str,
|
| 242 |
+
rng: random.Random,
|
| 243 |
+
**kwargs,
|
| 244 |
+
) -> str:
|
| 245 |
+
templates = _MESSAGES[personality].get(category, _MESSAGES[SellerPersonality.DEFAULT][category])
|
| 246 |
+
template = rng.choice(templates)
|
| 247 |
+
return template.format(**kwargs)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# ββ Seller state βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 251 |
+
|
| 252 |
+
@dataclass
|
| 253 |
+
class SellerState:
|
| 254 |
+
cost: float = 30.0
|
| 255 |
+
anchor: float = 60.0
|
| 256 |
+
base_concession_rate: float = 0.08
|
| 257 |
+
inventory: int = 1
|
| 258 |
+
initial_inventory: int = 1
|
| 259 |
+
batna_probability: float = 0.1
|
| 260 |
+
reserve_price: float = 0.0
|
| 261 |
+
current_offer: float = 0.0
|
| 262 |
+
round_number: int = 0
|
| 263 |
+
max_rounds: int = 8
|
| 264 |
+
buyer_capitulation_rate: float = 0.0
|
| 265 |
+
offer_history: list[float] = field(default_factory=list)
|
| 266 |
+
batna_triggered: bool = False
|
| 267 |
+
personality: SellerPersonality = SellerPersonality.DEFAULT
|
| 268 |
+
_rng: random.Random = field(default_factory=random.Random)
|
| 269 |
+
|
| 270 |
+
# Tell tracking
|
| 271 |
+
last_tell: SellerTell | None = None
|
| 272 |
+
_pressure_used: int = 0
|
| 273 |
+
_bluff_count: int = 0
|
| 274 |
+
|
| 275 |
+
def __post_init__(self):
|
| 276 |
+
self.reserve_price = self.cost * 1.05
|
| 277 |
+
self.current_offer = self.anchor
|
| 278 |
+
# Personality adjustments to anchor
|
| 279 |
+
if self.personality == SellerPersonality.DECEPTIVE:
|
| 280 |
+
self.anchor *= 1.15 # inflated anchor
|
| 281 |
+
self.current_offer = self.anchor
|
| 282 |
+
elif self.personality == SellerPersonality.IMPATIENT:
|
| 283 |
+
self.max_rounds = max(4, self.max_rounds - 2) # shorter patience
|
| 284 |
+
elif self.personality == SellerPersonality.COLLABORATIVE:
|
| 285 |
+
self.anchor *= 0.9 # lower starting anchor
|
| 286 |
+
self.current_offer = self.anchor
|
| 287 |
+
self.reserve_price = self.cost * 1.02 # tighter margins
|
| 288 |
+
|
| 289 |
+
@property
|
| 290 |
+
def inventory_pressure(self) -> float:
|
| 291 |
+
if self.initial_inventory <= 1:
|
| 292 |
+
return 0.5
|
| 293 |
+
return self.inventory / self.initial_inventory
|
| 294 |
+
|
| 295 |
+
@property
|
| 296 |
+
def effective_concession_rate(self) -> float:
|
| 297 |
+
rate = self.base_concession_rate
|
| 298 |
+
|
| 299 |
+
# Personality modifiers
|
| 300 |
+
if self.personality == SellerPersonality.DECEPTIVE:
|
| 301 |
+
rate *= 0.7 # concedes less (anchored higher)
|
| 302 |
+
elif self.personality == SellerPersonality.IMPATIENT:
|
| 303 |
+
rate *= 1.5 # concedes fast but walks fast
|
| 304 |
+
elif self.personality == SellerPersonality.COLLABORATIVE:
|
| 305 |
+
rate *= 1.3 # concedes toward fairness
|
| 306 |
+
|
| 307 |
+
rate *= (1.0 + 0.5 * self.inventory_pressure)
|
| 308 |
+
rate *= (1.0 - 0.3 * self.buyer_capitulation_rate)
|
| 309 |
+
return min(rate, 0.25)
|
| 310 |
+
|
| 311 |
+
def compute_counteroffer(self, round_t: int) -> float:
|
| 312 |
+
t_frac = round_t / max(self.max_rounds, 1)
|
| 313 |
+
concession = self.effective_concession_rate * round_t
|
| 314 |
+
offer = self.anchor * (1.0 - concession)
|
| 315 |
+
|
| 316 |
+
# Personality-specific counteroffer adjustments
|
| 317 |
+
if self.personality == SellerPersonality.DECEPTIVE and self._rng.random() < 0.3:
|
| 318 |
+
# Occasionally fake a big concession then pull back next round
|
| 319 |
+
if self._bluff_count < 2:
|
| 320 |
+
offer *= 0.92 # looks generous
|
| 321 |
+
self._bluff_count += 1
|
| 322 |
+
elif self.personality == SellerPersonality.IMPATIENT:
|
| 323 |
+
# Front-load concessions: big drops early, nothing later
|
| 324 |
+
if round_t <= 2:
|
| 325 |
+
offer *= (1.0 - 0.08 * round_t)
|
| 326 |
+
# After round 2, barely move
|
| 327 |
+
elif self.personality == SellerPersonality.COLLABORATIVE:
|
| 328 |
+
# Move toward midpoint between cost and buyer's last offer
|
| 329 |
+
if self.offer_history and len(self.offer_history) > 0:
|
| 330 |
+
midpoint = (self.cost * 1.1 + (self.offer_history[-1] if self.offer_history else self.anchor)) / 2
|
| 331 |
+
offer = offer * 0.7 + midpoint * 0.3
|
| 332 |
+
|
| 333 |
+
offer = max(offer, self.reserve_price)
|
| 334 |
+
return round(offer, 2)
|
| 335 |
+
|
| 336 |
+
def respond(
|
| 337 |
+
self, buyer_offer: float | None, round_t: int
|
| 338 |
+
) -> tuple[str, float, SellerTell, str]:
|
| 339 |
+
"""Seller's response to a buyer action.
|
| 340 |
+
|
| 341 |
+
Returns (action, price, tell, message):
|
| 342 |
+
("counter", price, tell, msg)
|
| 343 |
+
("accept", price, tell, msg)
|
| 344 |
+
("walk", 0, tell, msg)
|
| 345 |
+
"""
|
| 346 |
+
self.round_number = round_t
|
| 347 |
+
|
| 348 |
+
# Compute tells BEFORE decision (observable during deliberation)
|
| 349 |
+
tell = _compute_tells(self, buyer_offer, round_t, self._rng)
|
| 350 |
+
self.last_tell = tell
|
| 351 |
+
|
| 352 |
+
item = "item" # will be overridden by environment
|
| 353 |
+
|
| 354 |
+
# Check BATNA
|
| 355 |
+
batna_threshold = self.batna_probability * (round_t / self.max_rounds)
|
| 356 |
+
if self.personality == SellerPersonality.IMPATIENT:
|
| 357 |
+
batna_threshold *= 1.5 # walks sooner
|
| 358 |
+
elif self.personality == SellerPersonality.COLLABORATIVE:
|
| 359 |
+
batna_threshold *= 0.3 # rarely walks
|
| 360 |
+
|
| 361 |
+
if self._rng.random() < batna_threshold:
|
| 362 |
+
if buyer_offer is None or buyer_offer < self.reserve_price * 0.9:
|
| 363 |
+
self.batna_triggered = True
|
| 364 |
+
msg = _pick_message(self.personality, "walk", self._rng, item=item, price=0)
|
| 365 |
+
return ("walk", 0.0, tell, msg)
|
| 366 |
+
|
| 367 |
+
# If buyer made an offer
|
| 368 |
+
if buyer_offer is not None:
|
| 369 |
+
# Accept if offer >= current ask
|
| 370 |
+
accept_threshold = 0.98
|
| 371 |
+
if self.personality == SellerPersonality.COLLABORATIVE:
|
| 372 |
+
accept_threshold = 0.95 # more accepting
|
| 373 |
+
elif self.personality == SellerPersonality.DECEPTIVE:
|
| 374 |
+
accept_threshold = 1.0 # harder to close
|
| 375 |
+
|
| 376 |
+
if buyer_offer >= self.current_offer * accept_threshold:
|
| 377 |
+
msg = _pick_message(self.personality, "accept", self._rng,
|
| 378 |
+
item=item, price=buyer_offer)
|
| 379 |
+
return ("accept", buyer_offer, tell, msg)
|
| 380 |
+
|
| 381 |
+
# Time pressure acceptance
|
| 382 |
+
time_pressure = round_t / self.max_rounds
|
| 383 |
+
if buyer_offer >= self.reserve_price and time_pressure > 0.75:
|
| 384 |
+
accept_prob = (buyer_offer - self.reserve_price) / (self.anchor - self.reserve_price)
|
| 385 |
+
accept_prob *= time_pressure
|
| 386 |
+
|
| 387 |
+
if self.personality == SellerPersonality.IMPATIENT:
|
| 388 |
+
accept_prob *= 1.3
|
| 389 |
+
elif self.personality == SellerPersonality.DECEPTIVE:
|
| 390 |
+
accept_prob *= 0.6
|
| 391 |
+
|
| 392 |
+
if self._rng.random() < accept_prob:
|
| 393 |
+
msg = _pick_message(self.personality, "accept", self._rng,
|
| 394 |
+
item=item, price=buyer_offer)
|
| 395 |
+
return ("accept", buyer_offer, tell, msg)
|
| 396 |
+
|
| 397 |
+
# Make counteroffer
|
| 398 |
+
new_offer = self.compute_counteroffer(round_t)
|
| 399 |
+
if buyer_offer is not None and len(self.offer_history) > 0:
|
| 400 |
+
last = self.offer_history[-1]
|
| 401 |
+
midpoint = (new_offer + buyer_offer) / 2
|
| 402 |
+
new_offer = max(new_offer, midpoint * 0.95)
|
| 403 |
+
new_offer = max(new_offer, self.reserve_price)
|
| 404 |
+
|
| 405 |
+
# Deceptive: occasionally pull back (raise price)
|
| 406 |
+
if self.personality == SellerPersonality.DECEPTIVE:
|
| 407 |
+
if self._bluff_count > 0 and self._rng.random() < 0.25 and self.offer_history:
|
| 408 |
+
new_offer = max(new_offer, self.offer_history[-1] * 1.03)
|
| 409 |
+
new_offer = max(new_offer, self.reserve_price)
|
| 410 |
+
self._bluff_count = 0
|
| 411 |
+
|
| 412 |
+
self.current_offer = round(new_offer, 2)
|
| 413 |
+
|
| 414 |
+
# If our computed counteroffer is at or below the buyer's offer, just accept --
|
| 415 |
+
# no rational seller counters below what the buyer already offered.
|
| 416 |
+
if buyer_offer is not None and self.current_offer <= buyer_offer:
|
| 417 |
+
msg = _pick_message(self.personality, "accept", self._rng,
|
| 418 |
+
item=item, price=buyer_offer)
|
| 419 |
+
return ("accept", buyer_offer, tell, msg)
|
| 420 |
+
|
| 421 |
+
self.offer_history.append(self.current_offer)
|
| 422 |
+
|
| 423 |
+
# Maybe add pressure message
|
| 424 |
+
pressure_msg = ""
|
| 425 |
+
if self._rng.random() < 0.2 + (round_t / self.max_rounds) * 0.3:
|
| 426 |
+
if self.personality == SellerPersonality.DECEPTIVE or self._pressure_used < 2:
|
| 427 |
+
pressure_msg = " " + _pick_message(self.personality, "pressure", self._rng,
|
| 428 |
+
item=item, price=self.current_offer)
|
| 429 |
+
self._pressure_used += 1
|
| 430 |
+
|
| 431 |
+
msg = _pick_message(self.personality, "counter", self._rng,
|
| 432 |
+
item=item, price=self.current_offer, cost=self.cost) + pressure_msg
|
| 433 |
+
|
| 434 |
+
return ("counter", self.current_offer, tell, msg)
|
| 435 |
+
|
| 436 |
+
def update_career_info(self, capitulation_rate: float):
|
| 437 |
+
self.buyer_capitulation_rate = capitulation_rate
|
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Task configurations and graders for BazaarBot."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from .models import DealOutcome, DealRecord, SellerPersonalityType, TaskConfig
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# ββ Task Definitions ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
TASKS: dict[str, TaskConfig] = {
|
| 11 |
+
"single_deal": TaskConfig(
|
| 12 |
+
name="single_deal",
|
| 13 |
+
difficulty="easy",
|
| 14 |
+
description=(
|
| 15 |
+
"Buyer negotiates one deal. Symmetric information. No career history. "
|
| 16 |
+
"Seller concedes at moderate rate."
|
| 17 |
+
),
|
| 18 |
+
max_steps=8,
|
| 19 |
+
total_episodes=1,
|
| 20 |
+
buyer_budget=100.0,
|
| 21 |
+
seller_cost=30.0,
|
| 22 |
+
seller_anchor_multiplier=2.0,
|
| 23 |
+
seller_concession_rate=0.08,
|
| 24 |
+
buyer_deadline=None,
|
| 25 |
+
seller_inventory=1,
|
| 26 |
+
seller_batna_probability=0.05,
|
| 27 |
+
enable_career=False,
|
| 28 |
+
success_threshold=0.3,
|
| 29 |
+
),
|
| 30 |
+
"asymmetric_pressure": TaskConfig(
|
| 31 |
+
name="asymmetric_pressure",
|
| 32 |
+
difficulty="medium",
|
| 33 |
+
description=(
|
| 34 |
+
"Buyer has hidden hard deadline at round 5. Seller has hidden inventory pressure. "
|
| 35 |
+
"Agent must infer seller urgency from offer velocity and close before deadline."
|
| 36 |
+
),
|
| 37 |
+
max_steps=8,
|
| 38 |
+
total_episodes=1,
|
| 39 |
+
buyer_budget=100.0,
|
| 40 |
+
seller_cost=30.0,
|
| 41 |
+
seller_anchor_multiplier=2.0,
|
| 42 |
+
seller_concession_rate=0.06,
|
| 43 |
+
buyer_deadline=5,
|
| 44 |
+
seller_inventory=5,
|
| 45 |
+
seller_batna_probability=0.08,
|
| 46 |
+
enable_career=False,
|
| 47 |
+
success_threshold=0.4,
|
| 48 |
+
),
|
| 49 |
+
"career_10": TaskConfig(
|
| 50 |
+
name="career_10",
|
| 51 |
+
difficulty="hard",
|
| 52 |
+
description=(
|
| 53 |
+
"Buyer plays 10 consecutive deals against same seller. Career history active. "
|
| 54 |
+
"Seller adapts concession rate based on buyer's historical capitulation rate. "
|
| 55 |
+
"Agent must manage reputation across episodes."
|
| 56 |
+
),
|
| 57 |
+
max_steps=80,
|
| 58 |
+
total_episodes=10,
|
| 59 |
+
buyer_budget=100.0,
|
| 60 |
+
seller_cost=30.0,
|
| 61 |
+
seller_anchor_multiplier=2.0,
|
| 62 |
+
seller_concession_rate=0.07,
|
| 63 |
+
buyer_deadline=None,
|
| 64 |
+
seller_inventory=10,
|
| 65 |
+
seller_batna_probability=0.1,
|
| 66 |
+
enable_career=True,
|
| 67 |
+
success_threshold=0.5,
|
| 68 |
+
),
|
| 69 |
+
# ββ New personality-based tasks ββββββββββββββββββββββββββββββ
|
| 70 |
+
"deceptive_seller": TaskConfig(
|
| 71 |
+
name="deceptive_seller",
|
| 72 |
+
difficulty="hard",
|
| 73 |
+
description=(
|
| 74 |
+
"Seller bluffs about demand, fakes urgency, anchors 15% higher. "
|
| 75 |
+
"Tells leak deception cues -- verbal over-justification, fidgeting, "
|
| 76 |
+
"erratic concessions. Agent must read through the bluffs."
|
| 77 |
+
),
|
| 78 |
+
max_steps=10,
|
| 79 |
+
total_episodes=1,
|
| 80 |
+
buyer_budget=100.0,
|
| 81 |
+
seller_cost=30.0,
|
| 82 |
+
seller_anchor_multiplier=2.0,
|
| 83 |
+
seller_concession_rate=0.06,
|
| 84 |
+
buyer_deadline=None,
|
| 85 |
+
seller_inventory=3,
|
| 86 |
+
seller_batna_probability=0.05,
|
| 87 |
+
enable_career=False,
|
| 88 |
+
success_threshold=0.35,
|
| 89 |
+
seller_personality=SellerPersonalityType.DECEPTIVE,
|
| 90 |
+
enable_tells=True,
|
| 91 |
+
),
|
| 92 |
+
"impatient_seller": TaskConfig(
|
| 93 |
+
name="impatient_seller",
|
| 94 |
+
difficulty="medium",
|
| 95 |
+
description=(
|
| 96 |
+
"Seller concedes fast but walks fast. Shorter patience window. "
|
| 97 |
+
"Agent must close quickly or risk losing the deal. "
|
| 98 |
+
"Front-loaded concession pattern is the key tell."
|
| 99 |
+
),
|
| 100 |
+
max_steps=8,
|
| 101 |
+
total_episodes=1,
|
| 102 |
+
buyer_budget=100.0,
|
| 103 |
+
seller_cost=30.0,
|
| 104 |
+
seller_anchor_multiplier=2.0,
|
| 105 |
+
seller_concession_rate=0.08,
|
| 106 |
+
buyer_deadline=None,
|
| 107 |
+
seller_inventory=1,
|
| 108 |
+
seller_batna_probability=0.15,
|
| 109 |
+
enable_career=False,
|
| 110 |
+
success_threshold=0.3,
|
| 111 |
+
seller_personality=SellerPersonalityType.IMPATIENT,
|
| 112 |
+
enable_tells=True,
|
| 113 |
+
),
|
| 114 |
+
"collaborative_seller": TaskConfig(
|
| 115 |
+
name="collaborative_seller",
|
| 116 |
+
difficulty="easy",
|
| 117 |
+
description=(
|
| 118 |
+
"Seller seeks fair deals, concedes toward midpoint. Lower anchor, "
|
| 119 |
+
"tighter margins. Agent should reciprocate to maximize joint surplus. "
|
| 120 |
+
"Tests whether agent adapts to cooperative opponents."
|
| 121 |
+
),
|
| 122 |
+
max_steps=8,
|
| 123 |
+
total_episodes=1,
|
| 124 |
+
buyer_budget=100.0,
|
| 125 |
+
seller_cost=30.0,
|
| 126 |
+
seller_anchor_multiplier=2.0,
|
| 127 |
+
seller_concession_rate=0.10,
|
| 128 |
+
buyer_deadline=None,
|
| 129 |
+
seller_inventory=1,
|
| 130 |
+
seller_batna_probability=0.02,
|
| 131 |
+
enable_career=False,
|
| 132 |
+
success_threshold=0.4,
|
| 133 |
+
seller_personality=SellerPersonalityType.COLLABORATIVE,
|
| 134 |
+
enable_tells=True,
|
| 135 |
+
),
|
| 136 |
+
"read_the_tells": TaskConfig(
|
| 137 |
+
name="read_the_tells",
|
| 138 |
+
difficulty="expert",
|
| 139 |
+
description=(
|
| 140 |
+
"Deceptive seller with strong tells. Agent gets bonus score for "
|
| 141 |
+
"exploiting tells -- closing below midpoint when deception cues are high "
|
| 142 |
+
"indicates the agent read the bluff. Game theory meets poker."
|
| 143 |
+
),
|
| 144 |
+
max_steps=10,
|
| 145 |
+
total_episodes=5,
|
| 146 |
+
buyer_budget=100.0,
|
| 147 |
+
seller_cost=30.0,
|
| 148 |
+
seller_anchor_multiplier=2.2,
|
| 149 |
+
seller_concession_rate=0.05,
|
| 150 |
+
buyer_deadline=None,
|
| 151 |
+
seller_inventory=5,
|
| 152 |
+
seller_batna_probability=0.08,
|
| 153 |
+
enable_career=True,
|
| 154 |
+
success_threshold=0.45,
|
| 155 |
+
seller_personality=SellerPersonalityType.DECEPTIVE,
|
| 156 |
+
enable_tells=True,
|
| 157 |
+
),
|
| 158 |
+
"marketplace_arena": TaskConfig(
|
| 159 |
+
name="marketplace_arena",
|
| 160 |
+
difficulty="expert",
|
| 161 |
+
description=(
|
| 162 |
+
"Multi-buyer marketplace: 2-3 buyers compete for the same item from one seller. "
|
| 163 |
+
"Buyers can signal cooperation or competition. "
|
| 164 |
+
"Seller plays buyers against each other. Facebook Marketplace dynamics."
|
| 165 |
+
),
|
| 166 |
+
max_steps=12,
|
| 167 |
+
total_episodes=1,
|
| 168 |
+
buyer_budget=100.0,
|
| 169 |
+
seller_cost=30.0,
|
| 170 |
+
seller_anchor_multiplier=2.0,
|
| 171 |
+
seller_concession_rate=0.06,
|
| 172 |
+
buyer_deadline=None,
|
| 173 |
+
seller_inventory=1,
|
| 174 |
+
seller_batna_probability=0.05,
|
| 175 |
+
enable_career=False,
|
| 176 |
+
success_threshold=0.3,
|
| 177 |
+
seller_personality=SellerPersonalityType.DEFAULT,
|
| 178 |
+
enable_tells=True,
|
| 179 |
+
num_buyers=3,
|
| 180 |
+
enable_coalition=True,
|
| 181 |
+
),
|
| 182 |
+
"amazon_realistic": TaskConfig(
|
| 183 |
+
name="amazon_realistic",
|
| 184 |
+
difficulty="medium",
|
| 185 |
+
description=(
|
| 186 |
+
"Single-deal negotiation over a real Amazon listing. Item, MRP, and "
|
| 187 |
+
"street price sampled per episode from data/amazon.csv. "
|
| 188 |
+
"Forces generalization across product categories and price magnitudes."
|
| 189 |
+
),
|
| 190 |
+
max_steps=8,
|
| 191 |
+
total_episodes=1,
|
| 192 |
+
# buyer_budget / seller_cost are ignored when use_real_listings=True;
|
| 193 |
+
# kept here as fallbacks if the CSV is missing on the runtime.
|
| 194 |
+
buyer_budget=1000.0,
|
| 195 |
+
seller_cost=400.0,
|
| 196 |
+
seller_anchor_multiplier=2.0,
|
| 197 |
+
seller_concession_rate=0.08,
|
| 198 |
+
buyer_deadline=None,
|
| 199 |
+
seller_inventory=1,
|
| 200 |
+
seller_batna_probability=0.05,
|
| 201 |
+
enable_career=False,
|
| 202 |
+
success_threshold=0.3,
|
| 203 |
+
seller_personality=SellerPersonalityType.DEFAULT,
|
| 204 |
+
enable_tells=True,
|
| 205 |
+
use_real_listings=True,
|
| 206 |
+
),
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
# ββ Graders βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 211 |
+
|
| 212 |
+
def grade_single_deal(results: list[DealRecord], task: TaskConfig) -> float:
|
| 213 |
+
if not results:
|
| 214 |
+
return 0.0
|
| 215 |
+
deal = results[0]
|
| 216 |
+
if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
|
| 217 |
+
return 0.0
|
| 218 |
+
surplus = task.buyer_budget - deal.agreed_price
|
| 219 |
+
max_surplus = task.buyer_budget - task.seller_cost
|
| 220 |
+
if max_surplus <= 0:
|
| 221 |
+
return 0.0
|
| 222 |
+
score = surplus / max_surplus
|
| 223 |
+
return max(0.0, min(1.0, score))
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def grade_asymmetric_pressure(results: list[DealRecord], task: TaskConfig) -> float:
|
| 227 |
+
if not results:
|
| 228 |
+
return 0.0
|
| 229 |
+
deal = results[0]
|
| 230 |
+
if deal.outcome == DealOutcome.WALK:
|
| 231 |
+
return 0.0
|
| 232 |
+
if deal.outcome == DealOutcome.EXPIRED:
|
| 233 |
+
return 0.0
|
| 234 |
+
if deal.agreed_price is None:
|
| 235 |
+
return 0.0
|
| 236 |
+
|
| 237 |
+
surplus = task.buyer_budget - deal.agreed_price
|
| 238 |
+
max_surplus = task.buyer_budget - task.seller_cost
|
| 239 |
+
surplus_score = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
|
| 240 |
+
|
| 241 |
+
deadline = task.buyer_deadline or 5
|
| 242 |
+
deadline_bonus = 1.0 if deal.rounds_taken <= deadline else 0.5
|
| 243 |
+
|
| 244 |
+
score = surplus_score * deadline_bonus
|
| 245 |
+
return max(0.0, min(1.0, score))
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def grade_career_10(results: list[DealRecord], task: TaskConfig) -> float:
|
| 249 |
+
if not results:
|
| 250 |
+
return 0.0
|
| 251 |
+
|
| 252 |
+
rounds_per_ep = task.max_steps // task.total_episodes
|
| 253 |
+
weighted_scores = []
|
| 254 |
+
|
| 255 |
+
for deal in results:
|
| 256 |
+
if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
|
| 257 |
+
weighted_scores.append(0.0)
|
| 258 |
+
continue
|
| 259 |
+
|
| 260 |
+
surplus = task.buyer_budget - deal.agreed_price
|
| 261 |
+
max_surplus = task.buyer_budget - task.seller_cost
|
| 262 |
+
norm_surplus = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
|
| 263 |
+
|
| 264 |
+
efficiency = max(0.0, 1.0 - (deal.rounds_taken / rounds_per_ep) * 0.3)
|
| 265 |
+
weighted_scores.append(norm_surplus * efficiency)
|
| 266 |
+
|
| 267 |
+
score = sum(weighted_scores) / max(len(weighted_scores), 1)
|
| 268 |
+
return max(0.0, min(1.0, score))
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def grade_personality_task(results: list[DealRecord], task: TaskConfig) -> float:
|
| 272 |
+
"""Generic grader for personality tasks -- same as single_deal but per-episode mean."""
|
| 273 |
+
if not results:
|
| 274 |
+
return 0.0
|
| 275 |
+
|
| 276 |
+
scores = []
|
| 277 |
+
for deal in results:
|
| 278 |
+
if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
|
| 279 |
+
scores.append(0.0)
|
| 280 |
+
continue
|
| 281 |
+
surplus = task.buyer_budget - deal.agreed_price
|
| 282 |
+
max_surplus = task.buyer_budget - task.seller_cost
|
| 283 |
+
norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
|
| 284 |
+
scores.append(norm)
|
| 285 |
+
|
| 286 |
+
return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def grade_read_the_tells(results: list[DealRecord], task: TaskConfig) -> float:
|
| 290 |
+
"""Bonus for reading deception -- closing well below midpoint earns extra."""
|
| 291 |
+
if not results:
|
| 292 |
+
return 0.0
|
| 293 |
+
|
| 294 |
+
midpoint = (task.buyer_budget + task.seller_cost) / 2
|
| 295 |
+
scores = []
|
| 296 |
+
|
| 297 |
+
for deal in results:
|
| 298 |
+
if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
|
| 299 |
+
scores.append(0.0)
|
| 300 |
+
continue
|
| 301 |
+
surplus = task.buyer_budget - deal.agreed_price
|
| 302 |
+
max_surplus = task.buyer_budget - task.seller_cost
|
| 303 |
+
norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
|
| 304 |
+
|
| 305 |
+
# Bonus for closing below midpoint (reading the bluff)
|
| 306 |
+
if deal.agreed_price < midpoint:
|
| 307 |
+
bluff_bonus = 0.15 * ((midpoint - deal.agreed_price) / (midpoint - task.seller_cost))
|
| 308 |
+
norm = min(1.0, norm + bluff_bonus)
|
| 309 |
+
|
| 310 |
+
scores.append(norm)
|
| 311 |
+
|
| 312 |
+
return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def grade_amazon_realistic(results: list[DealRecord], task: TaskConfig) -> float:
|
| 316 |
+
"""Grader for real-listing tasks: relies on per-episode normalized_surplus
|
| 317 |
+
(which uses the seller's episode cost, not the task's default cost)."""
|
| 318 |
+
if not results:
|
| 319 |
+
return 0.0
|
| 320 |
+
deal = results[0]
|
| 321 |
+
if deal.outcome != DealOutcome.DEAL:
|
| 322 |
+
return 0.0
|
| 323 |
+
return max(0.0, min(1.0, deal.normalized_surplus))
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
GRADERS = {
|
| 327 |
+
"single_deal": grade_single_deal,
|
| 328 |
+
"asymmetric_pressure": grade_asymmetric_pressure,
|
| 329 |
+
"career_10": grade_career_10,
|
| 330 |
+
"deceptive_seller": grade_personality_task,
|
| 331 |
+
"impatient_seller": grade_personality_task,
|
| 332 |
+
"collaborative_seller": grade_personality_task,
|
| 333 |
+
"read_the_tells": grade_read_the_tells,
|
| 334 |
+
"marketplace_arena": grade_personality_task,
|
| 335 |
+
"amazon_realistic": grade_amazon_realistic,
|
| 336 |
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .extractor import TellExtractor
|
| 2 |
+
|
| 3 |
+
__all__ = ["TellExtractor"]
|
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Evaluate the NLP tell extractor against Chicago HAI human-labeled rows.
|
| 2 |
+
|
| 3 |
+
Compares ministral-3:3b zero-shot extraction to Chicago HAI ground-truth labels:
|
| 4 |
+
- firm_soft β verbal_confidence (binary: f=high, s=low)
|
| 5 |
+
- external_incentive=y β verbal_deception_cue (claim of outside pressure)
|
| 6 |
+
- category β loosely informs urgency/confidence
|
| 7 |
+
|
| 8 |
+
Also runs a rule-based control baseline (no LLM) for comparison.
|
| 9 |
+
|
| 10 |
+
Logs to runs/{ts}_extractor_eval/ via RunLogger.
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
PYTHONPATH=. .venv/bin/python nlp/eval_extractor.py [--n 500] [--model ministral-3:3b]
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import argparse
|
| 17 |
+
import json
|
| 18 |
+
import pathlib
|
| 19 |
+
import time
|
| 20 |
+
from collections import defaultdict
|
| 21 |
+
|
| 22 |
+
from nlp.extractor import TellExtractor, _condition_from_text, DEFAULT_TELL
|
| 23 |
+
from utils.run_logger import RunLogger
|
| 24 |
+
|
| 25 |
+
LABELED_ROWS = pathlib.Path("nlp/data/chicago_hai_bargaining.jsonl")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def load_labeled(min_len: int = 10) -> list[dict]:
|
| 29 |
+
"""Load Chicago HAI rows that have at least one human label."""
|
| 30 |
+
rows = []
|
| 31 |
+
with open(LABELED_ROWS) as f:
|
| 32 |
+
for line in f:
|
| 33 |
+
r = json.loads(line)
|
| 34 |
+
has_label = bool(r["category"] or r["firm_soft"] or r["external_incentive"])
|
| 35 |
+
if has_label and len(r["utterance"]) >= min_len:
|
| 36 |
+
rows.append(r)
|
| 37 |
+
return rows
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def rule_based_predict(utterance: str) -> dict:
|
| 41 |
+
"""Control baseline: condition rules only, defaults elsewhere."""
|
| 42 |
+
cond_score, dep_score, cond_label = _condition_from_text(utterance)
|
| 43 |
+
out = dict(DEFAULT_TELL)
|
| 44 |
+
out["condition_score"] = cond_score
|
| 45 |
+
out["depreciation_score"] = dep_score
|
| 46 |
+
out["condition_label"] = cond_label
|
| 47 |
+
return out
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def score_row(predicted: dict, gold: dict, row: dict) -> dict:
|
| 51 |
+
"""Per-row scoring against Chicago HAI labels."""
|
| 52 |
+
out = {
|
| 53 |
+
"abs_err_urgency": abs(predicted["verbal_urgency"] - gold["verbal_urgency"]),
|
| 54 |
+
"abs_err_confidence": abs(predicted["verbal_confidence"] - gold["verbal_confidence"]),
|
| 55 |
+
"abs_err_deception": abs(predicted["verbal_deception_cue"] - gold["verbal_deception_cue"]),
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Binary firm/soft accuracy: gold confidence β₯ 0.5 = firm
|
| 59 |
+
if row["firm_soft"]:
|
| 60 |
+
gold_firm = row["firm_soft"] == "f"
|
| 61 |
+
pred_firm = predicted["verbal_confidence"] >= 0.5
|
| 62 |
+
out["firm_correct"] = int(gold_firm == pred_firm)
|
| 63 |
+
|
| 64 |
+
# External incentive (deception) recall: gold y β pred deception β₯ 0.4
|
| 65 |
+
if row["external_incentive"] == "y":
|
| 66 |
+
out["deception_flagged"] = int(predicted["verbal_deception_cue"] >= 0.4)
|
| 67 |
+
|
| 68 |
+
return out
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def aggregate(per_row: list[dict]) -> dict:
|
| 72 |
+
"""Roll up per-row scores into a summary."""
|
| 73 |
+
sums = defaultdict(list)
|
| 74 |
+
for r in per_row:
|
| 75 |
+
for k, v in r.items():
|
| 76 |
+
if isinstance(v, (int, float)) and not isinstance(v, bool):
|
| 77 |
+
sums[k].append(v)
|
| 78 |
+
return {f"mean_{k}": round(sum(v) / len(v), 4) for k, v in sums.items() if v}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def run_pass(rows: list[dict], predict_fn, name: str, log) -> dict:
|
| 82 |
+
"""Run one extraction pass over labeled rows."""
|
| 83 |
+
print(f"\n[{name}] running on {len(rows)} rows ...")
|
| 84 |
+
per_row = []
|
| 85 |
+
t0 = time.time()
|
| 86 |
+
for i, row in enumerate(rows):
|
| 87 |
+
try:
|
| 88 |
+
pred = predict_fn(row["utterance"])
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f" ! row {i} failed: {e}")
|
| 91 |
+
continue
|
| 92 |
+
|
| 93 |
+
scored = score_row(pred, row["tell_supervision"], row)
|
| 94 |
+
log.metric({
|
| 95 |
+
**scored,
|
| 96 |
+
"pass": name,
|
| 97 |
+
"row_idx": i,
|
| 98 |
+
"utterance_preview": row["utterance"][:80],
|
| 99 |
+
})
|
| 100 |
+
per_row.append(scored)
|
| 101 |
+
|
| 102 |
+
if (i + 1) % 50 == 0:
|
| 103 |
+
elapsed = time.time() - t0
|
| 104 |
+
rate = (i + 1) / elapsed
|
| 105 |
+
eta = (len(rows) - i - 1) / rate
|
| 106 |
+
print(f" [{i+1}/{len(rows)}] {rate:.2f} rows/s ETA {eta:.0f}s")
|
| 107 |
+
|
| 108 |
+
elapsed = time.time() - t0
|
| 109 |
+
print(f"[{name}] done in {elapsed:.1f}s")
|
| 110 |
+
summary = aggregate(per_row)
|
| 111 |
+
summary["n"] = len(per_row)
|
| 112 |
+
summary["elapsed_s"] = round(elapsed, 1)
|
| 113 |
+
return summary
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def main():
|
| 117 |
+
parser = argparse.ArgumentParser()
|
| 118 |
+
parser.add_argument("--n", type=int, default=500, help="Cap on labeled rows")
|
| 119 |
+
parser.add_argument("--model", type=str, default="ministral-3:3b")
|
| 120 |
+
parser.add_argument("--seed", type=int, default=42)
|
| 121 |
+
args = parser.parse_args()
|
| 122 |
+
|
| 123 |
+
import random
|
| 124 |
+
random.seed(args.seed)
|
| 125 |
+
|
| 126 |
+
rows = load_labeled()
|
| 127 |
+
print(f"Loaded {len(rows)} labeled rows from Chicago HAI")
|
| 128 |
+
if args.n and args.n < len(rows):
|
| 129 |
+
rows = random.sample(rows, args.n)
|
| 130 |
+
print(f"Sampled {args.n} rows for eval")
|
| 131 |
+
|
| 132 |
+
extractor = TellExtractor(model=args.model)
|
| 133 |
+
|
| 134 |
+
with RunLogger("extractor_eval") as log:
|
| 135 |
+
log.config({
|
| 136 |
+
"model": args.model,
|
| 137 |
+
"n_rows": len(rows),
|
| 138 |
+
"seed": args.seed,
|
| 139 |
+
"labeled_source": "chicago_hai_bargaining.jsonl",
|
| 140 |
+
})
|
| 141 |
+
|
| 142 |
+
rule_summary = run_pass(rows, rule_based_predict, "rule_based", log)
|
| 143 |
+
ministral_summary = run_pass(rows, extractor.extract, args.model, log)
|
| 144 |
+
|
| 145 |
+
comparison = {
|
| 146 |
+
"rule_based": rule_summary,
|
| 147 |
+
args.model: ministral_summary,
|
| 148 |
+
"deltas": {
|
| 149 |
+
k.replace("mean_", "delta_"): ministral_summary.get(k, 0) - rule_summary.get(k, 0)
|
| 150 |
+
for k in rule_summary
|
| 151 |
+
if k.startswith("mean_") and k in ministral_summary
|
| 152 |
+
},
|
| 153 |
+
}
|
| 154 |
+
log.summary(comparison)
|
| 155 |
+
|
| 156 |
+
print("\n=== SUMMARY ===")
|
| 157 |
+
print(json.dumps(comparison, indent=2))
|
| 158 |
+
print(f"\nFull logs: {log.dir}")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
if __name__ == "__main__":
|
| 162 |
+
main()
|
|
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""NLP Tell Extractor β reads seller utterances, returns TellObservation.
|
| 2 |
+
|
| 3 |
+
Uses a local Ollama model (default: gemma4:e2b) to extract structured signals
|
| 4 |
+
from free-text seller messages. Output schema matches TellObservation in
|
| 5 |
+
bazaarbot_env/models.py β same fields, same ranges.
|
| 6 |
+
|
| 7 |
+
The extractor runs as a post-processing step after the seller speaks. For the
|
| 8 |
+
rule-based seller it's a cross-check; for the LLM seller it's the primary
|
| 9 |
+
tell source.
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
from nlp.extractor import TellExtractor
|
| 13 |
+
extractor = TellExtractor()
|
| 14 |
+
tells = extractor.extract("bhai last price hai, kal se badhega", history=[...])
|
| 15 |
+
|
| 16 |
+
Standalone test:
|
| 17 |
+
python nlp/extractor.py
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import json
|
| 23 |
+
import re
|
| 24 |
+
import textwrap
|
| 25 |
+
from typing import Optional
|
| 26 |
+
import requests
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
OLLAMA_URL = "http://localhost:11434/api/generate"
|
| 30 |
+
DEFAULT_MODEL = "ministral-3:3b"
|
| 31 |
+
|
| 32 |
+
# ββ Condition vocabulary ββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
+
|
| 34 |
+
# eBay standardized grades β (condition_score, depreciation_score, label)
|
| 35 |
+
CONDITION_GRADES: list[tuple[list[str], float, float, str]] = [
|
| 36 |
+
(
|
| 37 |
+
["new", "sealed", "mint", "mib", "mint in box", "brand new", "unused",
|
| 38 |
+
"box band", "seal pack", "sealed pack", "never opened", "factory sealed"],
|
| 39 |
+
1.0, 0.0, "new",
|
| 40 |
+
),
|
| 41 |
+
(
|
| 42 |
+
["like new", "open box", "barely used", "3 months", "6 months",
|
| 43 |
+
"thoda use", "thoda sa use", "bilkul sahi", "almost new", "excellent"],
|
| 44 |
+
0.85, 0.10, "like_new",
|
| 45 |
+
),
|
| 46 |
+
(
|
| 47 |
+
["very good", "vgc", "minor scratch", "ek chhota scratch", "small scratch",
|
| 48 |
+
"light scratch", "minor wear", "slight", "good condition"],
|
| 49 |
+
0.70, 0.25, "very_good",
|
| 50 |
+
),
|
| 51 |
+
(
|
| 52 |
+
["good", "guc", "some scratches", "few scratches", "normal wear",
|
| 53 |
+
"works perfectly", "fully functional", "theek kaam", "sahi kaam"],
|
| 54 |
+
0.55, 0.40, "good",
|
| 55 |
+
),
|
| 56 |
+
(
|
| 57 |
+
["acceptable", "heavy scratch", "dent", "battery low", "battery thodi kam",
|
| 58 |
+
"screen crack", "needs repair", "rough", "worn", "purana hai"],
|
| 59 |
+
0.35, 0.60, "acceptable",
|
| 60 |
+
),
|
| 61 |
+
(
|
| 62 |
+
["for parts", "broken", "dead", "not working", "kharab", "kaam nahi karta",
|
| 63 |
+
"damaged", "junk"],
|
| 64 |
+
0.10, 0.90, "junk",
|
| 65 |
+
),
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _condition_from_text(text: str) -> tuple[float, float, str]:
|
| 70 |
+
"""Rule-based fast pass for condition signals before LLM extraction."""
|
| 71 |
+
lower = text.lower()
|
| 72 |
+
for keywords, score, dep, label in CONDITION_GRADES:
|
| 73 |
+
for kw in keywords:
|
| 74 |
+
if kw in lower:
|
| 75 |
+
return score, dep, label
|
| 76 |
+
return 1.0, 0.0, "unknown"
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# ββ Hinglish few-shot examples for the extractor prompt ββββββββββ
|
| 80 |
+
|
| 81 |
+
HINGLISH_FEW_SHOTS = """
|
| 82 |
+
Utterance: "bhai last price hai, kal se price badhega"
|
| 83 |
+
Tells: {"verbal_urgency": 0.75, "verbal_confidence": 0.6, "verbal_deception_cue": 0.5, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.3, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
|
| 84 |
+
|
| 85 |
+
Utterance: "ek chhota sa scratch hai screen pe, baaki sab bilkul theek hai"
|
| 86 |
+
Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.6, "verbal_deception_cue": 0.2, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.7, "depreciation_score": 0.25, "condition_label": "very_good"}
|
| 87 |
+
|
| 88 |
+
Utterance: "abhi teen aur log dekh rahe hain, aaj hi lena padega"
|
| 89 |
+
Tells: {"verbal_urgency": 0.8, "verbal_confidence": 0.7, "verbal_deception_cue": 0.75, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.4, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
|
| 90 |
+
|
| 91 |
+
Utterance: "battery thodi kam hai, 79% hai, magar charger ke saath deta hoon"
|
| 92 |
+
Tells: {"verbal_urgency": 0.2, "verbal_confidence": 0.5, "verbal_deception_cue": 0.1, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.35, "depreciation_score": 0.6, "condition_label": "acceptable"}
|
| 93 |
+
|
| 94 |
+
Utterance: "box band hai, seal packed, maine khola bhi nahi"
|
| 95 |
+
Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.8, "verbal_deception_cue": 0.0, "offer_speed": "normal", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "new"}
|
| 96 |
+
""".strip()
|
| 97 |
+
|
| 98 |
+
# Chicago HAI examples (English formal negotiations)
|
| 99 |
+
CHICAGO_FEW_SHOTS = """
|
| 100 |
+
Utterance: "I have another buyer coming in an hour, this is my final offer"
|
| 101 |
+
Tells: {"verbal_urgency": 0.7, "verbal_confidence": 0.75, "verbal_deception_cue": 0.65, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.3, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
|
| 102 |
+
|
| 103 |
+
Utterance: "Minor scratches on the back, fully functional, battery health 81%"
|
| 104 |
+
Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.6, "verbal_deception_cue": 0.15, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.55, "depreciation_score": 0.4, "condition_label": "good"}
|
| 105 |
+
|
| 106 |
+
Utterance: "Okay fine, I can do 4500, but that is absolutely the lowest I'll go"
|
| 107 |
+
Tells: {"verbal_urgency": 0.5, "verbal_confidence": 0.55, "verbal_deception_cue": 0.3, "offer_speed": "deliberate", "concession_pattern": "front_loaded", "emotional_escalation": 0.35, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
|
| 108 |
+
|
| 109 |
+
Utterance: "MIB, never opened, still has the plastic wrap on it"
|
| 110 |
+
Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.85, "verbal_deception_cue": 0.0, "offer_speed": "normal", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "new"}
|
| 111 |
+
""".strip()
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
EXTRACTION_SYSTEM_PROMPT = textwrap.dedent(f"""\
|
| 115 |
+
You extract structured negotiation signals from a seller's message.
|
| 116 |
+
Output ONLY a single valid JSON object. No prose, no markdown, no explanation.
|
| 117 |
+
|
| 118 |
+
Output schema (all fields required):
|
| 119 |
+
{{
|
| 120 |
+
"verbal_urgency": <0.0β1.0, how desperate/pressured the seller sounds>,
|
| 121 |
+
"verbal_confidence": <0.0β1.0, how assertive/firm the seller sounds>,
|
| 122 |
+
"verbal_deception_cue": <0.0β1.0, signs of bluffing: social proof claims, fake scarcity, over-justification>,
|
| 123 |
+
"offer_speed": <"instant"|"normal"|"deliberate">,
|
| 124 |
+
"concession_pattern": <"steady"|"front_loaded"|"stalling"|"erratic">,
|
| 125 |
+
"emotional_escalation": <0.0β1.0, how emotionally charged the message is>,
|
| 126 |
+
"condition_score": <0.0β1.0, item condition from 0=junk to 1=mint. 1.0 if no condition info>,
|
| 127 |
+
"depreciation_score": <0.0β1.0, wear/damage level. 0.0 if no condition info>,
|
| 128 |
+
"condition_label": <"new"|"like_new"|"very_good"|"good"|"acceptable"|"junk"|"unknown">
|
| 129 |
+
}}
|
| 130 |
+
|
| 131 |
+
Calibration rules:
|
| 132 |
+
- Social proof ("another buyer", "3 log dekh rahe", "bahut demand hai") β verbal_deception_cue β₯ 0.6
|
| 133 |
+
- "Final price", "last offer", "bilkul nahi jaaunga" β verbal_confidence β₯ 0.7
|
| 134 |
+
- Time pressure claims ("kal se badhega", "aaj hi") β verbal_urgency β₯ 0.65
|
| 135 |
+
- Condition disclosures lower condition_score from 1.0; no disclosure = keep 1.0
|
| 136 |
+
- "Firm" language = verbal_confidence β₯ 0.75; "Soft/flexible" = β€ 0.35
|
| 137 |
+
|
| 138 |
+
Examples (Hinglish):
|
| 139 |
+
{HINGLISH_FEW_SHOTS}
|
| 140 |
+
|
| 141 |
+
Examples (English):
|
| 142 |
+
{CHICAGO_FEW_SHOTS}
|
| 143 |
+
""")
|
| 144 |
+
|
| 145 |
+
DEFAULT_TELL = {
|
| 146 |
+
"verbal_urgency": 0.2,
|
| 147 |
+
"verbal_confidence": 0.5,
|
| 148 |
+
"verbal_deception_cue": 0.0,
|
| 149 |
+
"offer_speed": "normal",
|
| 150 |
+
"concession_pattern": "steady",
|
| 151 |
+
"emotional_escalation": 0.0,
|
| 152 |
+
"condition_score": 1.0,
|
| 153 |
+
"depreciation_score": 0.0,
|
| 154 |
+
"condition_label": "unknown",
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
VALID_OFFER_SPEEDS = {"instant", "normal", "deliberate"}
|
| 158 |
+
VALID_CONCESSION_PATTERNS = {"steady", "front_loaded", "stalling", "erratic"}
|
| 159 |
+
VALID_CONDITION_LABELS = {"new", "like_new", "very_good", "good", "acceptable", "junk", "unknown"}
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _clamp(v, lo=0.0, hi=1.0) -> float:
|
| 163 |
+
try:
|
| 164 |
+
return max(lo, min(hi, float(v)))
|
| 165 |
+
except (TypeError, ValueError):
|
| 166 |
+
return (lo + hi) / 2
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _parse_extraction(raw: str) -> dict:
|
| 170 |
+
"""Parse JSON from LLM output, clamp ranges, fill missing fields."""
|
| 171 |
+
s = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
| 172 |
+
if "```" in s:
|
| 173 |
+
parts = s.split("```")
|
| 174 |
+
s = parts[1].lstrip("json").strip() if len(parts) >= 2 else s
|
| 175 |
+
start, end = s.find("{"), s.rfind("}") + 1
|
| 176 |
+
if start >= 0 and end > start:
|
| 177 |
+
s = s[start:end]
|
| 178 |
+
try:
|
| 179 |
+
parsed = json.loads(s)
|
| 180 |
+
except Exception:
|
| 181 |
+
return dict(DEFAULT_TELL)
|
| 182 |
+
|
| 183 |
+
out = dict(DEFAULT_TELL)
|
| 184 |
+
out["verbal_urgency"] = _clamp(parsed.get("verbal_urgency", out["verbal_urgency"]))
|
| 185 |
+
out["verbal_confidence"] = _clamp(parsed.get("verbal_confidence", out["verbal_confidence"]))
|
| 186 |
+
out["verbal_deception_cue"] = _clamp(parsed.get("verbal_deception_cue", out["verbal_deception_cue"]))
|
| 187 |
+
out["emotional_escalation"] = _clamp(parsed.get("emotional_escalation", out["emotional_escalation"]))
|
| 188 |
+
out["condition_score"] = _clamp(parsed.get("condition_score", out["condition_score"]))
|
| 189 |
+
out["depreciation_score"] = _clamp(parsed.get("depreciation_score", out["depreciation_score"]))
|
| 190 |
+
|
| 191 |
+
speed = parsed.get("offer_speed", "normal")
|
| 192 |
+
out["offer_speed"] = speed if speed in VALID_OFFER_SPEEDS else "normal"
|
| 193 |
+
|
| 194 |
+
pattern = parsed.get("concession_pattern", "steady")
|
| 195 |
+
out["concession_pattern"] = pattern if pattern in VALID_CONCESSION_PATTERNS else "steady"
|
| 196 |
+
|
| 197 |
+
label = parsed.get("condition_label", "unknown")
|
| 198 |
+
out["condition_label"] = label if label in VALID_CONDITION_LABELS else "unknown"
|
| 199 |
+
|
| 200 |
+
return out
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
class TellExtractor:
|
| 204 |
+
"""Extracts TellObservation fields from seller free text via Ollama."""
|
| 205 |
+
|
| 206 |
+
def __init__(self, model: str = DEFAULT_MODEL, ollama_url: str = OLLAMA_URL):
|
| 207 |
+
self.model = model
|
| 208 |
+
self.ollama_url = ollama_url
|
| 209 |
+
|
| 210 |
+
def _call_ollama(self, prompt: str) -> str:
|
| 211 |
+
payload = {
|
| 212 |
+
"model": self.model,
|
| 213 |
+
"prompt": prompt,
|
| 214 |
+
"stream": False,
|
| 215 |
+
"options": {"temperature": 0.1, "num_predict": 256},
|
| 216 |
+
}
|
| 217 |
+
try:
|
| 218 |
+
resp = requests.post(self.ollama_url, json=payload, timeout=120)
|
| 219 |
+
resp.raise_for_status()
|
| 220 |
+
return resp.json().get("response", "")
|
| 221 |
+
except Exception as e:
|
| 222 |
+
print(f"[extractor] Ollama call failed: {e}")
|
| 223 |
+
return ""
|
| 224 |
+
|
| 225 |
+
def extract(
|
| 226 |
+
self,
|
| 227 |
+
message: str,
|
| 228 |
+
history: Optional[list[str]] = None,
|
| 229 |
+
fast: bool = False,
|
| 230 |
+
) -> dict:
|
| 231 |
+
"""Extract tell signals from a seller utterance.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
message: the seller's current utterance
|
| 235 |
+
history: last N turns as strings (for context)
|
| 236 |
+
fast: if True, skip LLM and use rule-based condition extraction only
|
| 237 |
+
(useful during GRPO rollouts where latency matters)
|
| 238 |
+
|
| 239 |
+
Returns:
|
| 240 |
+
dict matching TellObservation field names
|
| 241 |
+
"""
|
| 242 |
+
# Fast path: rule-based condition extraction, defaults for everything else
|
| 243 |
+
cond_score, dep_score, cond_label = _condition_from_text(message)
|
| 244 |
+
if fast:
|
| 245 |
+
result = dict(DEFAULT_TELL)
|
| 246 |
+
result["condition_score"] = cond_score
|
| 247 |
+
result["depreciation_score"] = dep_score
|
| 248 |
+
result["condition_label"] = cond_label
|
| 249 |
+
return result
|
| 250 |
+
|
| 251 |
+
history_block = ""
|
| 252 |
+
if history:
|
| 253 |
+
recent = history[-3:]
|
| 254 |
+
history_block = "\nRecent conversation:\n" + "\n".join(recent) + "\n"
|
| 255 |
+
|
| 256 |
+
user_prompt = (
|
| 257 |
+
f"{history_block}"
|
| 258 |
+
f'\nSeller says: "{message}"\n\n'
|
| 259 |
+
"Extract tells as JSON:"
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
full_prompt = EXTRACTION_SYSTEM_PROMPT + "\n\n" + user_prompt
|
| 263 |
+
raw = self._call_ollama(full_prompt)
|
| 264 |
+
|
| 265 |
+
if not raw:
|
| 266 |
+
result = dict(DEFAULT_TELL)
|
| 267 |
+
result["condition_score"] = cond_score
|
| 268 |
+
result["depreciation_score"] = dep_score
|
| 269 |
+
result["condition_label"] = cond_label
|
| 270 |
+
return result
|
| 271 |
+
|
| 272 |
+
result = _parse_extraction(raw)
|
| 273 |
+
|
| 274 |
+
# Rule-based condition always wins over LLM for condition fields.
|
| 275 |
+
# Keyword matching on explicit condition phrases ("minor scratches",
|
| 276 |
+
# "box band", "MIB") is more reliable than LLM inference for this
|
| 277 |
+
# narrow vocabulary. LLM is better at urgency/deception where context
|
| 278 |
+
# and tone matter more than keyword lookup.
|
| 279 |
+
if cond_label != "unknown":
|
| 280 |
+
result["condition_score"] = cond_score
|
| 281 |
+
result["depreciation_score"] = dep_score
|
| 282 |
+
result["condition_label"] = cond_label
|
| 283 |
+
|
| 284 |
+
return result
|
| 285 |
+
|
| 286 |
+
def batch_extract(self, messages: list[str]) -> list[dict]:
|
| 287 |
+
return [self.extract(m) for m in messages]
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
# ββ Standalone test βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 291 |
+
|
| 292 |
+
TEST_UTTERANCES = [
|
| 293 |
+
# Hinglish urgency + social proof (deceptive)
|
| 294 |
+
"bhai last price hai, abhi teen aur log dekh rahe hain",
|
| 295 |
+
# Hinglish condition disclosure
|
| 296 |
+
"ek chhota sa scratch hai screen pe, battery 81% hai, baaki sab theek",
|
| 297 |
+
# Hinglish sealed
|
| 298 |
+
"box band hai, seal packed, maine kabhi khola nahi",
|
| 299 |
+
# English deceptive pressure
|
| 300 |
+
"I have another buyer coming in an hour, this is my absolute final offer",
|
| 301 |
+
# English condition
|
| 302 |
+
"Minor scratches on the back panel, fully functional, screen is perfect",
|
| 303 |
+
# English collaborative
|
| 304 |
+
"Look, I'll be honest with you β I paid 8000 for it, I just need 6500 to break even",
|
| 305 |
+
# eBay lingo
|
| 306 |
+
"MIB, never opened, still has factory seal",
|
| 307 |
+
# Impatient
|
| 308 |
+
"6000. Yes or no. I don't have all day.",
|
| 309 |
+
]
|
| 310 |
+
|
| 311 |
+
if __name__ == "__main__":
|
| 312 |
+
extractor = TellExtractor()
|
| 313 |
+
print(f"Using model: {extractor.model}\n")
|
| 314 |
+
print("=" * 60)
|
| 315 |
+
|
| 316 |
+
for utt in TEST_UTTERANCES:
|
| 317 |
+
print(f"Utterance: {utt}")
|
| 318 |
+
tells = extractor.extract(utt)
|
| 319 |
+
print(f" urgency={tells['verbal_urgency']:.2f} "
|
| 320 |
+
f"confidence={tells['verbal_confidence']:.2f} "
|
| 321 |
+
f"deception={tells['verbal_deception_cue']:.2f} "
|
| 322 |
+
f"speed={tells['offer_speed']}")
|
| 323 |
+
print(f" condition={tells['condition_label']} "
|
| 324 |
+
f"score={tells['condition_score']:.2f} "
|
| 325 |
+
f"depreciation={tells['depreciation_score']:.2f}")
|
| 326 |
+
print()
|
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Fetch and cache negotiation datasets used for NLP extractor supervision.
|
| 2 |
+
|
| 3 |
+
Datasets:
|
| 4 |
+
1. stanfordnlp/craigslist_bargains β per-turn intent labels (init-price/accept/reject)
|
| 5 |
+
2. ChicagoHAI/language-of-bargaining β per-turn bargaining act + Firm/Soft + External Incentive
|
| 6 |
+
3. casino β multi-issue strategy annotations
|
| 7 |
+
|
| 8 |
+
Run:
|
| 9 |
+
python nlp/fetch_datasets.py
|
| 10 |
+
|
| 11 |
+
Outputs written to nlp/data/:
|
| 12 |
+
craigslist_bargains.jsonl
|
| 13 |
+
chicago_hai_bargaining.jsonl
|
| 14 |
+
casino.jsonl
|
| 15 |
+
extractor_supervision.jsonl β merged supervision set for NLP extractor fine-tune
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import pathlib
|
| 20 |
+
from datasets import load_dataset
|
| 21 |
+
|
| 22 |
+
OUT = pathlib.Path(__file__).parent / "data"
|
| 23 |
+
OUT.mkdir(exist_ok=True)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# ββ Chicago HAI: Category β verbal tell mapping βββββββββββββββββββ
|
| 27 |
+
# Derived from ACL 2023 paper taxonomy
|
| 28 |
+
CHICAGO_CATEGORY_MAP = {
|
| 29 |
+
"offer": {"verbal_urgency": 0.2, "verbal_confidence": 0.7},
|
| 30 |
+
"counter-offer": {"verbal_urgency": 0.3, "verbal_confidence": 0.6},
|
| 31 |
+
"accept": {"verbal_urgency": 0.1, "verbal_confidence": 0.8},
|
| 32 |
+
"reject": {"verbal_urgency": 0.4, "verbal_confidence": 0.5},
|
| 33 |
+
"information": {"verbal_urgency": 0.1, "verbal_confidence": 0.6},
|
| 34 |
+
"threat": {"verbal_urgency": 0.7, "verbal_confidence": 0.8},
|
| 35 |
+
"appeal": {"verbal_urgency": 0.5, "verbal_confidence": 0.4},
|
| 36 |
+
"other": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
FIRM_SOFT_MAP = {
|
| 40 |
+
"Firm": 0.85,
|
| 41 |
+
"Soft": 0.25,
|
| 42 |
+
"": 0.5,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Non-empty External Incentive = social proof / bluff signal
|
| 46 |
+
EXTERNAL_INCENTIVE_DECEPTION = 0.65
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# ββ CaSiNo: strategy β tell mapping ββββββββββββββββββββββββββββββ
|
| 50 |
+
# CaSiNo annotates with: no-need, self-need, other-need, vouch-fair,
|
| 51 |
+
# showing-concern, no-deal, coordination, empathy, small-talk
|
| 52 |
+
CASINO_STRATEGY_MAP = {
|
| 53 |
+
"no-need": {"verbal_urgency": 0.1, "verbal_deception_cue": 0.3},
|
| 54 |
+
"self-need": {"verbal_urgency": 0.6, "verbal_deception_cue": 0.1},
|
| 55 |
+
"other-need": {"verbal_urgency": 0.3, "verbal_deception_cue": 0.4},
|
| 56 |
+
"vouch-fair": {"verbal_urgency": 0.2, "verbal_confidence": 0.7},
|
| 57 |
+
"showing-concern": {"verbal_urgency": 0.3, "verbal_confidence": 0.4},
|
| 58 |
+
"no-deal": {"verbal_urgency": 0.5, "verbal_confidence": 0.8},
|
| 59 |
+
"coordination": {"verbal_urgency": 0.2, "verbal_confidence": 0.6},
|
| 60 |
+
"empathy": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
|
| 61 |
+
"small-talk": {"verbal_urgency": 0.05, "verbal_confidence": 0.5},
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _default_tell() -> dict:
|
| 66 |
+
return {
|
| 67 |
+
"verbal_urgency": 0.2,
|
| 68 |
+
"verbal_confidence": 0.5,
|
| 69 |
+
"verbal_deception_cue": 0.0,
|
| 70 |
+
"condition_score": 1.0,
|
| 71 |
+
"depreciation_score": 0.0,
|
| 72 |
+
"condition_label": "unknown",
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def fetch_craigslist():
|
| 77 |
+
# Load from local CodaLab downloads: data/train.json + data/dev.json (gzipped)
|
| 78 |
+
# Source: https://worksheets.codalab.org/worksheets/0x453913e76b65495d8b9730d41c7e0a0c
|
| 79 |
+
# Schema: events list with action in {message, offer, accept, reject, quit}
|
| 80 |
+
# No per-turn intent labels β derive from action type
|
| 81 |
+
import gzip, pathlib
|
| 82 |
+
|
| 83 |
+
ACTION_TELL_MAP = {
|
| 84 |
+
"message": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
|
| 85 |
+
"offer": {"verbal_urgency": 0.35, "verbal_confidence": 0.7},
|
| 86 |
+
"accept": {"verbal_urgency": 0.1, "verbal_confidence": 0.8},
|
| 87 |
+
"reject": {"verbal_urgency": 0.45, "verbal_confidence": 0.55},
|
| 88 |
+
"quit": {"verbal_urgency": 0.6, "verbal_confidence": 0.6},
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
rows = []
|
| 92 |
+
for split in ("train", "dev"):
|
| 93 |
+
path = pathlib.Path(f"data/{split}.json")
|
| 94 |
+
if not path.exists():
|
| 95 |
+
print(f" ! data/{split}.json not found, skipping")
|
| 96 |
+
continue
|
| 97 |
+
print(f" Loading data/{split}.json ...")
|
| 98 |
+
try:
|
| 99 |
+
with gzip.open(path) as f:
|
| 100 |
+
examples = json.load(f)
|
| 101 |
+
except Exception:
|
| 102 |
+
# Try plain JSON if not gzipped
|
| 103 |
+
examples = json.loads(path.read_text())
|
| 104 |
+
|
| 105 |
+
for ex in examples:
|
| 106 |
+
kbs = ex.get("scenario", {}).get("kbs", [{}, {}])
|
| 107 |
+
# agent 0 = buyer (Role in personal), agent 1 = seller
|
| 108 |
+
agent_roles = {}
|
| 109 |
+
for kb in kbs:
|
| 110 |
+
role = kb.get("personal", {}).get("Role", "")
|
| 111 |
+
# agent index inferred from role
|
| 112 |
+
if role == "buyer":
|
| 113 |
+
agent_roles[0] = "buyer"
|
| 114 |
+
elif role == "seller":
|
| 115 |
+
agent_roles[1] = "seller"
|
| 116 |
+
|
| 117 |
+
outcome = ex.get("outcome", {})
|
| 118 |
+
deal_price = (outcome.get("offer") or {}).get("price")
|
| 119 |
+
|
| 120 |
+
for ev in ex.get("events", []):
|
| 121 |
+
action = ev.get("action", "")
|
| 122 |
+
text = ev.get("data", "")
|
| 123 |
+
if action != "message" or not isinstance(text, str) or len(text) < 5:
|
| 124 |
+
continue
|
| 125 |
+
|
| 126 |
+
agent_idx = ev.get("agent", 0)
|
| 127 |
+
role = agent_roles.get(agent_idx, "unknown")
|
| 128 |
+
|
| 129 |
+
tell = _default_tell()
|
| 130 |
+
tell.update(ACTION_TELL_MAP.get(action, {}))
|
| 131 |
+
|
| 132 |
+
# Derive condition signals from listing description if present
|
| 133 |
+
item = ex.get("scenario", {}).get("kbs", [{}])[0].get("item", {})
|
| 134 |
+
desc = " ".join(item.get("Description", []) or [])
|
| 135 |
+
if desc:
|
| 136 |
+
try:
|
| 137 |
+
from nlp.extractor import _condition_from_text
|
| 138 |
+
cond_score, dep_score, cond_label = _condition_from_text(desc)
|
| 139 |
+
if cond_label != "unknown":
|
| 140 |
+
tell["condition_score"] = cond_score
|
| 141 |
+
tell["depreciation_score"] = dep_score
|
| 142 |
+
tell["condition_label"] = cond_label
|
| 143 |
+
except ImportError:
|
| 144 |
+
pass
|
| 145 |
+
|
| 146 |
+
rows.append({
|
| 147 |
+
"source": "craigslist_bargains",
|
| 148 |
+
"role": role,
|
| 149 |
+
"utterance": text,
|
| 150 |
+
"action": action,
|
| 151 |
+
"deal_price": deal_price,
|
| 152 |
+
"tell_supervision": tell,
|
| 153 |
+
})
|
| 154 |
+
|
| 155 |
+
path = OUT / "craigslist_bargains.jsonl"
|
| 156 |
+
with open(path, "w") as f:
|
| 157 |
+
for r in rows:
|
| 158 |
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
| 159 |
+
print(f" β {len(rows)} turns written to {path}")
|
| 160 |
+
return rows
|
| 161 |
+
|
| 162 |
+
rows = []
|
| 163 |
+
for split in ("train", "validation"):
|
| 164 |
+
for ex in ds[split]:
|
| 165 |
+
utterances = ex["utterance"]
|
| 166 |
+
acts = ex["dialogue_acts"]
|
| 167 |
+
roles = [ex["agent_info"]["Role"][t] for t in ex["agent_turn"]]
|
| 168 |
+
item_price = ex["items"]["Price"][0] if ex["items"]["Price"] else None
|
| 169 |
+
|
| 170 |
+
for i, (utt, role) in enumerate(zip(utterances, roles)):
|
| 171 |
+
intent = acts["intent"][i] if acts and acts["intent"] else ""
|
| 172 |
+
price_val = acts["price"][i] if acts and acts["price"] else -1.0
|
| 173 |
+
|
| 174 |
+
tell = _default_tell()
|
| 175 |
+
if intent == "accept":
|
| 176 |
+
tell["verbal_urgency"] = 0.1
|
| 177 |
+
tell["verbal_confidence"] = 0.8
|
| 178 |
+
elif intent == "reject":
|
| 179 |
+
tell["verbal_urgency"] = 0.4
|
| 180 |
+
tell["verbal_confidence"] = 0.5
|
| 181 |
+
elif intent == "init-price":
|
| 182 |
+
tell["verbal_confidence"] = 0.75
|
| 183 |
+
|
| 184 |
+
rows.append({
|
| 185 |
+
"source": "craigslist_bargains",
|
| 186 |
+
"role": role,
|
| 187 |
+
"utterance": utt,
|
| 188 |
+
"intent": intent,
|
| 189 |
+
"price": float(price_val) if price_val and price_val != -1.0 else None,
|
| 190 |
+
"item_price": float(item_price) if item_price else None,
|
| 191 |
+
"tell_supervision": tell,
|
| 192 |
+
})
|
| 193 |
+
|
| 194 |
+
path = OUT / "craigslist_bargains.jsonl"
|
| 195 |
+
with open(path, "w") as f:
|
| 196 |
+
for r in rows:
|
| 197 |
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
| 198 |
+
print(f" β {len(rows)} turns written to {path}")
|
| 199 |
+
return rows
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def fetch_chicago_hai():
|
| 203 |
+
# Load from local negotiations_public_release/nl/ β 178 JSON files
|
| 204 |
+
# HF version is broken; we have the data zip locally already.
|
| 205 |
+
# Label taxonomy (from data exploration):
|
| 206 |
+
# Category: p=price, n=new-offer, c=counter, r=reject, a=accept, e=exit
|
| 207 |
+
# Firm or Soft: f=firm, s=soft
|
| 208 |
+
# External Incentive: y=yes (social proof / outside pressure claim)
|
| 209 |
+
import pathlib
|
| 210 |
+
nl_dir = pathlib.Path("negotiations_public_release/nl")
|
| 211 |
+
if not nl_dir.exists():
|
| 212 |
+
print(" ! negotiations_public_release/nl not found, skipping Chicago HAI")
|
| 213 |
+
return []
|
| 214 |
+
|
| 215 |
+
print(f"Loading Chicago HAI from {nl_dir} ({len(list(nl_dir.glob('*.json')))} files) ...")
|
| 216 |
+
|
| 217 |
+
CATEGORY_MAP = {
|
| 218 |
+
"p": {"verbal_urgency": 0.3, "verbal_confidence": 0.7}, # price proposal
|
| 219 |
+
"n": {"verbal_urgency": 0.4, "verbal_confidence": 0.65}, # new offer
|
| 220 |
+
"c": {"verbal_urgency": 0.35, "verbal_confidence": 0.6}, # counter
|
| 221 |
+
"r": {"verbal_urgency": 0.5, "verbal_confidence": 0.5}, # reject
|
| 222 |
+
"a": {"verbal_urgency": 0.1, "verbal_confidence": 0.8}, # accept
|
| 223 |
+
"e": {"verbal_urgency": 0.6, "verbal_confidence": 0.7}, # exit/walk
|
| 224 |
+
}
|
| 225 |
+
FIRM_MAP = {"f": 0.80, "s": 0.25}
|
| 226 |
+
|
| 227 |
+
rows = []
|
| 228 |
+
for fpath in sorted(nl_dir.glob("*.json")):
|
| 229 |
+
try:
|
| 230 |
+
raw = fpath.read_text().replace(": NaN", ": null")
|
| 231 |
+
d = json.loads(raw)
|
| 232 |
+
except Exception:
|
| 233 |
+
continue
|
| 234 |
+
|
| 235 |
+
for turn_words in d.get("turns", []):
|
| 236 |
+
if not isinstance(turn_words, list) or not turn_words:
|
| 237 |
+
continue
|
| 238 |
+
|
| 239 |
+
# Reconstruct utterance by joining Word fields
|
| 240 |
+
utterance = " ".join(
|
| 241 |
+
w.get("Word", "") for w in turn_words if w.get("Word")
|
| 242 |
+
).strip()
|
| 243 |
+
if len(utterance) < 5:
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
role = turn_words[0].get("Role", "")
|
| 247 |
+
|
| 248 |
+
# Take labels from last word that has them (annotation is span-level)
|
| 249 |
+
category, firm_soft, ext_incentive = "", "", ""
|
| 250 |
+
for w in reversed(turn_words):
|
| 251 |
+
if not category and w.get("Category"):
|
| 252 |
+
category = str(w["Category"]).strip()
|
| 253 |
+
if not firm_soft and w.get("Firm or Soft"):
|
| 254 |
+
firm_soft = str(w["Firm or Soft"]).strip()
|
| 255 |
+
if not ext_incentive and w.get("External Incentive"):
|
| 256 |
+
ext_incentive = str(w["External Incentive"]).strip()
|
| 257 |
+
|
| 258 |
+
tell = _default_tell()
|
| 259 |
+
tell.update(CATEGORY_MAP.get(category, {}))
|
| 260 |
+
if firm_soft in FIRM_MAP:
|
| 261 |
+
tell["verbal_confidence"] = FIRM_MAP[firm_soft]
|
| 262 |
+
if ext_incentive == "y":
|
| 263 |
+
tell["verbal_deception_cue"] = EXTERNAL_INCENTIVE_DECEPTION
|
| 264 |
+
|
| 265 |
+
rows.append({
|
| 266 |
+
"source": "chicago_hai",
|
| 267 |
+
"role": role,
|
| 268 |
+
"utterance": utterance,
|
| 269 |
+
"category": category,
|
| 270 |
+
"firm_soft": firm_soft,
|
| 271 |
+
"external_incentive": ext_incentive,
|
| 272 |
+
"tell_supervision": tell,
|
| 273 |
+
})
|
| 274 |
+
|
| 275 |
+
path = OUT / "chicago_hai_bargaining.jsonl"
|
| 276 |
+
with open(path, "w") as f:
|
| 277 |
+
for r in rows:
|
| 278 |
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
| 279 |
+
print(f" β {len(rows)} turns written to {path}")
|
| 280 |
+
return rows
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def fetch_casino():
|
| 284 |
+
print("Fetching casino (CaSiNo) ...")
|
| 285 |
+
try:
|
| 286 |
+
ds = load_dataset("casino", trust_remote_code=True)
|
| 287 |
+
except Exception as e:
|
| 288 |
+
print(f" ! Could not load: {e}")
|
| 289 |
+
return []
|
| 290 |
+
|
| 291 |
+
rows = []
|
| 292 |
+
for split in ds.keys():
|
| 293 |
+
for ex in ds[split]:
|
| 294 |
+
chat = ex.get("chat_logs", [])
|
| 295 |
+
for turn in chat:
|
| 296 |
+
utt = turn.get("text", "")
|
| 297 |
+
if not utt:
|
| 298 |
+
continue
|
| 299 |
+
|
| 300 |
+
role = turn.get("id", "")
|
| 301 |
+
# CaSiNo per-turn strategy is in annotations, not task_data
|
| 302 |
+
# task_data contains item allocation info, not strategy labels
|
| 303 |
+
# Strategy labels are in ex["annotations"] keyed by worker
|
| 304 |
+
strategy_label = ""
|
| 305 |
+
|
| 306 |
+
tell = _default_tell()
|
| 307 |
+
sig = CASINO_STRATEGY_MAP.get(strategy_label, {})
|
| 308 |
+
tell.update(sig)
|
| 309 |
+
|
| 310 |
+
rows.append({
|
| 311 |
+
"source": "casino",
|
| 312 |
+
"role": role,
|
| 313 |
+
"utterance": utt,
|
| 314 |
+
"strategy": strategy_label,
|
| 315 |
+
"tell_supervision": tell,
|
| 316 |
+
})
|
| 317 |
+
|
| 318 |
+
path = OUT / "casino.jsonl"
|
| 319 |
+
with open(path, "w") as f:
|
| 320 |
+
for r in rows:
|
| 321 |
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
| 322 |
+
print(f" β {len(rows)} turns written to {path}")
|
| 323 |
+
return rows
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def merge_supervision(craigslist, chicago, casino):
|
| 327 |
+
"""Merge all sources into a single supervision set for extractor training.
|
| 328 |
+
|
| 329 |
+
Each row: {"utterance": str, "tell_supervision": dict}
|
| 330 |
+
Only rows with non-trivial utterances (len > 10) and non-default tells are kept.
|
| 331 |
+
"""
|
| 332 |
+
all_rows = craigslist + chicago + casino
|
| 333 |
+
merged = []
|
| 334 |
+
for r in all_rows:
|
| 335 |
+
utt = r.get("utterance", "").strip()
|
| 336 |
+
tell = r.get("tell_supervision", {})
|
| 337 |
+
if len(utt) < 10:
|
| 338 |
+
continue
|
| 339 |
+
# Keep only rows where at least one tell deviates from defaults
|
| 340 |
+
non_default = (
|
| 341 |
+
tell.get("verbal_urgency", 0.2) != 0.2
|
| 342 |
+
or tell.get("verbal_confidence", 0.5) != 0.5
|
| 343 |
+
or tell.get("verbal_deception_cue", 0.0) != 0.0
|
| 344 |
+
)
|
| 345 |
+
if not non_default:
|
| 346 |
+
continue
|
| 347 |
+
merged.append({"utterance": utt, "source": r["source"], "tell_supervision": tell})
|
| 348 |
+
|
| 349 |
+
path = OUT / "extractor_supervision.jsonl"
|
| 350 |
+
with open(path, "w") as f:
|
| 351 |
+
for r in merged:
|
| 352 |
+
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
| 353 |
+
print(f"\nMerged supervision set: {len(merged)} rows β {path}")
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
if __name__ == "__main__":
|
| 357 |
+
craigslist = fetch_craigslist()
|
| 358 |
+
chicago = fetch_chicago_hai()
|
| 359 |
+
casino = fetch_casino()
|
| 360 |
+
merge_supervision(craigslist, chicago, casino)
|
| 361 |
+
print("\nDone. Run nlp/extractor.py to test extraction against these.")
|
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Keyword/phrase patterns for inline span-level tell highlighting.
|
| 2 |
+
|
| 3 |
+
Used by the /highlight endpoint to show users which exact phrases in their
|
| 4 |
+
message triggered which tell signal β Grammarly-style underlining in the
|
| 5 |
+
chat bubble.
|
| 6 |
+
|
| 7 |
+
Patterns mined from data/indian_negotiations.jsonl seller turns by strategy.
|
| 8 |
+
Hand-curated and grouped by tell signal:
|
| 9 |
+
|
| 10 |
+
urgency: "kal se", "abhi", "jaldi", "today only", "final price"
|
| 11 |
+
deception: "teen aur log dekh rahe", "other buyers", "kabhi nahi"
|
| 12 |
+
confidence: "market rate", "best price", "fixed price"
|
| 13 |
+
condition: "box pack", "scratch", "battery 81%", "abhi naya"
|
| 14 |
+
|
| 15 |
+
Each pattern has:
|
| 16 |
+
- regex (case-insensitive, word-bounded where useful)
|
| 17 |
+
- signal it triggers (urgency / deception / confidence / condition)
|
| 18 |
+
- score it adds to that signal (0-1)
|
| 19 |
+
- one-line explanation shown in the hover card
|
| 20 |
+
|
| 21 |
+
The frontend uses these to wrap matched spans in <mark> tags.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
import re
|
| 27 |
+
from typing import Literal, NamedTuple
|
| 28 |
+
|
| 29 |
+
Signal = Literal["urgency", "deception", "confidence", "condition"]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class Pattern(NamedTuple):
|
| 33 |
+
pattern: re.Pattern[str]
|
| 34 |
+
signal: Signal
|
| 35 |
+
score: float
|
| 36 |
+
explanation: str
|
| 37 |
+
"""Human-readable label for the hover card."""
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _p(regex: str, signal: Signal, score: float, explanation: str) -> Pattern:
|
| 41 |
+
return Pattern(re.compile(regex, re.IGNORECASE), signal, score, explanation)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
PATTERNS: list[Pattern] = [
|
| 45 |
+
# ββ URGENCY ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
_p(r"\bkal\s+se\b", "urgency", 0.6, "Time pressure: 'price changes tomorrow'"),
|
| 47 |
+
_p(r"\babhi\b(?!\s+nahi)", "urgency", 0.4, "Hindi 'right now' β pushes immediate decision"),
|
| 48 |
+
_p(r"\bjaldi\b", "urgency", 0.6, "Hindi 'quickly' β explicit urgency"),
|
| 49 |
+
_p(r"\btoday\s+only\b", "urgency", 0.7, "Time pressure: limited window"),
|
| 50 |
+
_p(r"\bfinal\s+price\b", "urgency", 0.5, "Anchoring: 'this is final, no negotiation'"),
|
| 51 |
+
_p(r"\blast\s+price\b", "urgency", 0.5, "Anchoring: claims this is the bottom"),
|
| 52 |
+
_p(r"\bfix(?:ed)?\s+(?:hai|price)\b", "urgency", 0.4, "Position commitment: 'price is fixed'"),
|
| 53 |
+
_p(r"\bno\s+(?:more\s+)?negotiation\b", "urgency", 0.7, "Closes the door on further bargaining"),
|
| 54 |
+
_p(r"\btime\s+waste\b", "urgency", 0.5, "Impatience signal"),
|
| 55 |
+
_p(r"\bimmediately\b", "urgency", 0.4, "Demands same-instant action"),
|
| 56 |
+
_p(r"\bsend\s+(?:the\s+)?money\b", "urgency", 0.5, "Pushing toward immediate transaction"),
|
| 57 |
+
_p(r"\b(?:i'?m|im|i am)\s+(?:making\s+a\s+)?los(?:s|ing)\b", "urgency", 0.55,
|
| 58 |
+
"Loss-claim sympathy push β pressures buyer to feel bad about price"),
|
| 59 |
+
_p(r"\bnot?\s+making\s+(?:any\s+)?(?:profit|money)\b", "urgency", 0.5, "No-profit sympathy push"),
|
| 60 |
+
_p(r"\bbarely\s+breaking\s+even\b", "urgency", 0.5, "Sympathy push: claims zero margin"),
|
| 61 |
+
|
| 62 |
+
# ββ DECEPTION ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
+
# The classic: "teen aur log dekh rahe" (three other people are looking)
|
| 64 |
+
_p(r"\bteen\s+aur\s+log\b", "deception", 0.8,
|
| 65 |
+
"External-incentive bluff: claims multiple competing buyers (CaSiNo deception cue)"),
|
| 66 |
+
_p(r"\bother\s+(?:people|buyers?)\s+(?:are\s+)?looking\b", "deception", 0.8,
|
| 67 |
+
"External-incentive bluff: claims competing buyers"),
|
| 68 |
+
_p(r"\bothers\s+are\s+looking\b", "deception", 0.8, "External-incentive bluff"),
|
| 69 |
+
_p(r"\bkoi\s+aur\s+(?:buyer|log)\b", "deception", 0.7, "Claims another buyer is interested"),
|
| 70 |
+
_p(r"\baur\s+log\s+(?:bhi\s+)?dekh\b", "deception", 0.7, "Claims more people watching"),
|
| 71 |
+
_p(r"\bdemand\s+(?:zyada|high)\b", "deception", 0.4, "Claims market demand to justify price"),
|
| 72 |
+
_p(r"\bmarket\s+(?:mein\s+)?(?:bahut\s+)?demand\b", "deception", 0.4, "Claims market demand"),
|
| 73 |
+
_p(r"\bbest\s+price\b", "deception", 0.3, "Self-praise β soft anchoring"),
|
| 74 |
+
# Numeric "3 other offers" / "two more buyers" β same external-incentive bluff
|
| 75 |
+
# as "teen aur log" but in English with digits or number-words.
|
| 76 |
+
_p(r"\b(?:\d+|two|three|four|five|several|multiple|many)\s+(?:other\s+|more\s+)?(?:offers?|buyers?|people|interested)\b",
|
| 77 |
+
"deception", 0.75, "External-incentive bluff: claims competing offers/buyers"),
|
| 78 |
+
_p(r"\bgot\s+(?:\d+|two|three|four|five|several|multiple|other)\s+(?:offers?|buyers?)\b",
|
| 79 |
+
"deception", 0.75, "Claims existing competing offers"),
|
| 80 |
+
_p(r"\b(?:already\s+)?have\s+(?:\d+|two|three|four|other)\s+(?:offers?|buyers?)\b",
|
| 81 |
+
"deception", 0.75, "Claims existing competing offers"),
|
| 82 |
+
|
| 83 |
+
# ββ CONFIDENCE βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 84 |
+
_p(r"\bmarket\s+rate\b", "confidence", 0.6, "Confidence: anchoring to external price reference"),
|
| 85 |
+
_p(r"\bmarket\s+mein\s+iski\b", "confidence", 0.5, "Confidence: market positioning"),
|
| 86 |
+
_p(r"\bnahi\s+ho(?:\s+payega)?\b", "confidence", 0.6, "Firm refusal: 'won't happen'"),
|
| 87 |
+
_p(r"\bmushkil\s+hai\b", "confidence", 0.4, "Mild firmness: 'difficult'"),
|
| 88 |
+
_p(r"\bisse\s+(?:upar|kam)\s+nahi\b", "confidence", 0.7, "Hard floor/ceiling commitment"),
|
| 89 |
+
_p(r"\bnot?\s+(?:lower|higher)\b", "confidence", 0.6, "Position commitment"),
|
| 90 |
+
|
| 91 |
+
# ββ CONDITION ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
_p(r"\bbox\s+(?:band|pack|sealed?)\b", "condition", 0.95, "Item is sealed / new in box"),
|
| 93 |
+
_p(r"\bseal\s+packed?\b", "condition", 0.95, "New, factory-sealed"),
|
| 94 |
+
_p(r"\babhi\s+box\s+se\s+nikala\b", "condition", 0.9, "Just unboxed β like new"),
|
| 95 |
+
_p(r"\b(?:bilkul\s+)?naya\b", "condition", 0.85, "Hindi 'brand new'"),
|
| 96 |
+
_p(r"\b(?:like\s+new|mint)\b", "condition", 0.85, "Like-new condition"),
|
| 97 |
+
_p(r"\bbarely\s+used\b", "condition", 0.8, "Lightly used"),
|
| 98 |
+
_p(r"\bek\s+(?:chhota\s+)?scratch\b", "condition", 0.55, "Minor scratch β visible wear"),
|
| 99 |
+
_p(r"\b(?:minor\s+)?scratch(?:es)?\b", "condition", 0.55, "Minor cosmetic damage"),
|
| 100 |
+
_p(r"\bdent\b", "condition", 0.5, "Dent β moderate wear"),
|
| 101 |
+
_p(r"\bchip(?:ped)?\b", "condition", 0.5, "Chipped β visible damage"),
|
| 102 |
+
_p(r"\bscreen\s+(?:replaced|change)\b", "condition", 0.35,
|
| 103 |
+
"Screen replacement β depreciation indicator"),
|
| 104 |
+
_p(r"\bbattery\s+(?:health\s+)?(\d{2,3})\s*%?\b", "condition", 0.4,
|
| 105 |
+
"Battery health disclosure β wear indicator"),
|
| 106 |
+
_p(r"\b(\d{1,2})\s*(?:saal|year)s?\s+(?:purana|old)\b", "condition", 0.5,
|
| 107 |
+
"Age disclosure"),
|
| 108 |
+
_p(r"\bkabhi\s+giraya\s+nahi\b", "condition", 0.85, "Never dropped β careful owner"),
|
| 109 |
+
_p(r"\boriginal\s+(?:box|charger|warranty)\b", "condition", 0.75,
|
| 110 |
+
"Has original accessories"),
|
| 111 |
+
_p(r"\bwarranty\b", "condition", 0.7, "Has warranty"),
|
| 112 |
+
_p(r"\bperfect\s+condition\b", "condition", 0.85, "Perfect condition claim"),
|
| 113 |
+
_p(r"\bworking\s+condition\b", "condition", 0.7, "Functional but unspecified wear"),
|
| 114 |
+
]
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
class Match(NamedTuple):
|
| 118 |
+
start: int
|
| 119 |
+
end: int
|
| 120 |
+
text: str
|
| 121 |
+
signal: Signal
|
| 122 |
+
score: float
|
| 123 |
+
explanation: str
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def find_matches(message: str) -> list[Match]:
|
| 127 |
+
"""Find all pattern matches in `message`. Returns char-offset spans."""
|
| 128 |
+
matches: list[Match] = []
|
| 129 |
+
for pat in PATTERNS:
|
| 130 |
+
for m in pat.pattern.finditer(message):
|
| 131 |
+
matches.append(
|
| 132 |
+
Match(
|
| 133 |
+
start=m.start(),
|
| 134 |
+
end=m.end(),
|
| 135 |
+
text=m.group(0),
|
| 136 |
+
signal=pat.signal,
|
| 137 |
+
score=pat.score,
|
| 138 |
+
explanation=pat.explanation,
|
| 139 |
+
)
|
| 140 |
+
)
|
| 141 |
+
return matches
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def aggregate_signals(matches: list[Match]) -> dict[str, float]:
|
| 145 |
+
"""Roll up per-signal max score across matches."""
|
| 146 |
+
rolled: dict[str, float] = {}
|
| 147 |
+
for m in matches:
|
| 148 |
+
rolled[m.signal] = max(rolled.get(m.signal, 0.0), m.score)
|
| 149 |
+
return rolled
|
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Waits for the Indian negotiation generator to finish, then:
|
| 3 |
+
# 1. Pulls ministral-3:3b into Ollama
|
| 4 |
+
# 2. Swaps it as the default extractor model
|
| 5 |
+
# 3. Runs the extractor test to verify quality
|
| 6 |
+
#
|
| 7 |
+
# Run with: bash nlp/setup_ministral.sh &
|
| 8 |
+
# Log: /tmp/setup_ministral.log
|
| 9 |
+
|
| 10 |
+
set -euo pipefail
|
| 11 |
+
LOG=/tmp/setup_ministral.log
|
| 12 |
+
TARGET=500
|
| 13 |
+
JSONL=data/indian_negotiations.jsonl
|
| 14 |
+
EXTRACTOR=nlp/extractor.py
|
| 15 |
+
|
| 16 |
+
log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; }
|
| 17 |
+
|
| 18 |
+
cd /home/meow/Documents/Projects/MetaThon
|
| 19 |
+
|
| 20 |
+
log "Watching generator β waiting for $TARGET conversations in $JSONL ..."
|
| 21 |
+
|
| 22 |
+
while true; do
|
| 23 |
+
if [ -f "$JSONL" ]; then
|
| 24 |
+
count=$(wc -l < "$JSONL")
|
| 25 |
+
log "Progress: $count / $TARGET conversations"
|
| 26 |
+
if [ "$count" -ge "$TARGET" ]; then
|
| 27 |
+
log "Generator done."
|
| 28 |
+
break
|
| 29 |
+
fi
|
| 30 |
+
else
|
| 31 |
+
log "Output file not found yet, waiting..."
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
# Also stop waiting if the generator process is gone and file exists
|
| 35 |
+
if [ -f "$JSONL" ] && ! pgrep -f generate_indian_negotiations.py > /dev/null 2>&1; then
|
| 36 |
+
count=$(wc -l < "$JSONL")
|
| 37 |
+
log "Generator process ended with $count conversations. Proceeding."
|
| 38 |
+
break
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
sleep 120
|
| 42 |
+
done
|
| 43 |
+
|
| 44 |
+
log "Pulling ministral-3:3b ..."
|
| 45 |
+
ollama pull ministral-3:3b 2>&1 | tee -a "$LOG"
|
| 46 |
+
|
| 47 |
+
log "Verifying ministral-3:3b is available ..."
|
| 48 |
+
ollama list | tee -a "$LOG"
|
| 49 |
+
|
| 50 |
+
log "Running extractor test with ministral-3:3b ..."
|
| 51 |
+
PYTHONPATH=. .venv/bin/python "$EXTRACTOR" 2>&1 | tee -a "$LOG"
|
| 52 |
+
|
| 53 |
+
log "All done. Check $LOG for extractor quality results."
|
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Buyer-message templates for SFT targets and steerer-override fallback.
|
| 2 |
+
|
| 3 |
+
Templates are bucketed by:
|
| 4 |
+
action_kind β {offer_low, offer_mid, offer_high, accept, walk}
|
| 5 |
+
register β {firm, soft, polite, curt, final} β escalating tone
|
| 6 |
+
|
| 7 |
+
Use the `turn_index` in render() to bias toward `final` register on later turns
|
| 8 |
+
(round-aware escalation: opening turns sound exploratory, late turns sound terminal).
|
| 9 |
+
|
| 10 |
+
The bank avoids "yaar" (informal/casual filler) and over-uses of "bhai" β keeps the
|
| 11 |
+
buyer's voice grounded in Hinglish-leaning English without sounding like a street vendor.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import random
|
| 15 |
+
from typing import Optional
|
| 16 |
+
|
| 17 |
+
# Each entry: (register, template). All offer templates have {price}.
|
| 18 |
+
TEMPLATES: dict[str, list[tuple[str, str]]] = {
|
| 19 |
+
"offer_low": [
|
| 20 |
+
("firm", "{price} max de sakta hu, isse upar nahi"),
|
| 21 |
+
("firm", "{price} mera offer, isse zyada nahi"),
|
| 22 |
+
("firm", "{price} pe baat banegi, warna nahi"),
|
| 23 |
+
("firm", "{price} hai bas, final"),
|
| 24 |
+
("soft", "{price} mein de dijiye please"),
|
| 25 |
+
("soft", "{price} chalega bhai?"),
|
| 26 |
+
("soft", "{price} kar lo, deal pakki"),
|
| 27 |
+
("polite", "market mein {price} mein mil jaata hai, dekh lijiye"),
|
| 28 |
+
("polite", "honestly bhai, {price} fair lagta hai mujhe"),
|
| 29 |
+
("polite", "{price} reasonable hai, condition dekh ke"),
|
| 30 |
+
("curt", "{price}. le ya jaa."),
|
| 31 |
+
("curt", "{price}, last from my side"),
|
| 32 |
+
("final", "okay, {price} mera final offer hai"),
|
| 33 |
+
("final", "{price} ya nahi β beyond this I walk"),
|
| 34 |
+
],
|
| 35 |
+
"offer_mid": [
|
| 36 |
+
("firm", "{price} pe karte hain deal"),
|
| 37 |
+
("firm", "{price} works for me, lock kar do"),
|
| 38 |
+
("firm", "chalo, {price} pe baat khatam"),
|
| 39 |
+
("soft", "{price} chalega bhai?"),
|
| 40 |
+
("soft", "{price} mein ho jaye?"),
|
| 41 |
+
("soft", "thoda kam karo, {price} pe finalize?"),
|
| 42 |
+
("polite", "{price} fair hai dono ke liye"),
|
| 43 |
+
("polite", "{price} sahi rate lagta hai mujhe"),
|
| 44 |
+
("curt", "{price}. that's where I am"),
|
| 45 |
+
("curt", "{price}, isse upar nahi"),
|
| 46 |
+
("final", "{price} ya I'm out"),
|
| 47 |
+
("final", "this is my last move β {price}"),
|
| 48 |
+
],
|
| 49 |
+
"offer_high": [
|
| 50 |
+
("firm", "okay, {price} but that's the limit"),
|
| 51 |
+
("firm", "{price}, isse upar nahi ja sakta"),
|
| 52 |
+
("soft", "{price} okay? close kar dete hain"),
|
| 53 |
+
("soft", "fine, {price} mein le leta hu"),
|
| 54 |
+
("polite", "{price} stretch kar raha hu, condition fair lagi"),
|
| 55 |
+
("polite", "{price} de raha hu since you've been reasonable"),
|
| 56 |
+
("curt", "{price}. done?"),
|
| 57 |
+
("curt", "{price}, last bid"),
|
| 58 |
+
("final", "okay {price} β bas yahi ceiling hai"),
|
| 59 |
+
("final", "{price} pe close ya I walk"),
|
| 60 |
+
],
|
| 61 |
+
"accept": [
|
| 62 |
+
("firm", "deal."),
|
| 63 |
+
("firm", "done."),
|
| 64 |
+
("firm", "chalo, deal."),
|
| 65 |
+
("soft", "okay, le leta hu"),
|
| 66 |
+
("soft", "theek hai, kar lete hain"),
|
| 67 |
+
("polite", "fair, accepted"),
|
| 68 |
+
("polite", "sounds good, deal pakki"),
|
| 69 |
+
("curt", "haan."),
|
| 70 |
+
("curt", "ho gaya, done"),
|
| 71 |
+
("final", "deal, close kar dete hain"),
|
| 72 |
+
("final", "okay, isi pe lock"),
|
| 73 |
+
],
|
| 74 |
+
"walk": [
|
| 75 |
+
("firm", "nahi yaar nahi, ye nahi ho payega"),
|
| 76 |
+
("firm", "budget mein nahi aa raha, passing"),
|
| 77 |
+
("soft", "thanks for your time, dekhte hain phir kabhi"),
|
| 78 |
+
("soft", "appreciate it, but is price pe nahi"),
|
| 79 |
+
("polite", "gap zyada hai, mujhe pass karna hoga"),
|
| 80 |
+
("polite", "respect your floor, but mere liye nahi banega"),
|
| 81 |
+
("curt", "no deal."),
|
| 82 |
+
("curt", "passing, thanks"),
|
| 83 |
+
("final", "bahut difference hai β walking"),
|
| 84 |
+
("final", "is price pe nahi, goodbye"),
|
| 85 |
+
],
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _bucket_for_offer(price: float, ask: float) -> str:
|
| 90 |
+
"""Classify an offer price as low/mid/high relative to seller's ask."""
|
| 91 |
+
if ask <= 0:
|
| 92 |
+
return "offer_mid"
|
| 93 |
+
ratio = price / ask
|
| 94 |
+
if ratio < 0.55:
|
| 95 |
+
return "offer_low"
|
| 96 |
+
if ratio < 0.80:
|
| 97 |
+
return "offer_mid"
|
| 98 |
+
return "offer_high"
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _register_for_turn(turn_index: int, max_turns: int = 8) -> Optional[str]:
|
| 102 |
+
"""Bias register based on turn position.
|
| 103 |
+
|
| 104 |
+
- Turns 0-1 (opening): polite or soft
|
| 105 |
+
- Turns 2-4 (mid): firm or soft
|
| 106 |
+
- Turns 5+ (late): curt or final
|
| 107 |
+
"""
|
| 108 |
+
if turn_index < 0:
|
| 109 |
+
return None
|
| 110 |
+
progress = turn_index / max(1, max_turns)
|
| 111 |
+
if progress < 0.25:
|
| 112 |
+
return random.choice(["polite", "soft"])
|
| 113 |
+
if progress < 0.65:
|
| 114 |
+
return random.choice(["firm", "soft"])
|
| 115 |
+
return random.choice(["curt", "final"])
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def render(
|
| 119 |
+
action: str,
|
| 120 |
+
price: Optional[float],
|
| 121 |
+
ask: Optional[float] = None,
|
| 122 |
+
intent: Optional[str] = None,
|
| 123 |
+
turn_index: Optional[int] = None,
|
| 124 |
+
max_turns: int = 8,
|
| 125 |
+
used_history: Optional[set[str]] = None,
|
| 126 |
+
rng: Optional[random.Random] = None,
|
| 127 |
+
) -> str:
|
| 128 |
+
"""Pick a template, render it with the given price, avoid recent repeats.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
action: 'offer' | 'accept' | 'walk'
|
| 132 |
+
price: numeric price for offers; None for accept/walk
|
| 133 |
+
ask: seller's current ask (used to bucket offer price)
|
| 134 |
+
intent: explicit register override ('firm'|'soft'|'polite'|'curt'|'final')
|
| 135 |
+
turn_index: current round number β biases register toward 'final' as it grows
|
| 136 |
+
max_turns: typical episode length used for normalizing turn_index
|
| 137 |
+
used_history: set of templates already rendered this episode (avoid repeats)
|
| 138 |
+
rng: optional Random instance for reproducibility
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
A natural-language line, with {price} slot filled.
|
| 142 |
+
"""
|
| 143 |
+
rng = rng or random
|
| 144 |
+
if action == "offer":
|
| 145 |
+
bucket = _bucket_for_offer(price or 0, ask or (price or 0))
|
| 146 |
+
elif action == "accept":
|
| 147 |
+
bucket = "accept"
|
| 148 |
+
elif action == "walk":
|
| 149 |
+
bucket = "walk"
|
| 150 |
+
else:
|
| 151 |
+
return ""
|
| 152 |
+
|
| 153 |
+
candidates = TEMPLATES.get(bucket, [])
|
| 154 |
+
if not candidates:
|
| 155 |
+
return ""
|
| 156 |
+
|
| 157 |
+
# Determine register: explicit > turn-based > random
|
| 158 |
+
register = intent or (
|
| 159 |
+
_register_for_turn(turn_index, max_turns) if turn_index is not None else None
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
register_pool = [(r, t) for r, t in candidates if r == register] if register else list(candidates)
|
| 163 |
+
if not register_pool:
|
| 164 |
+
register_pool = list(candidates)
|
| 165 |
+
|
| 166 |
+
def _materialize(tmpl: str) -> str:
|
| 167 |
+
if "{price}" in tmpl and price is not None:
|
| 168 |
+
return tmpl.format(price=int(round(price)))
|
| 169 |
+
return tmpl
|
| 170 |
+
|
| 171 |
+
# `used_history` stores rendered messages, so compare against the materialized form.
|
| 172 |
+
# Variety > register fidelity when buyer is stuck β widen to all registers
|
| 173 |
+
# before allowing repeats.
|
| 174 |
+
if used_history:
|
| 175 |
+
fresh_in_register = [(r, t) for r, t in register_pool if _materialize(t) not in used_history]
|
| 176 |
+
if fresh_in_register:
|
| 177 |
+
pool = fresh_in_register
|
| 178 |
+
else:
|
| 179 |
+
fresh_anywhere = [(r, t) for r, t in candidates if _materialize(t) not in used_history]
|
| 180 |
+
pool = fresh_anywhere or register_pool
|
| 181 |
+
else:
|
| 182 |
+
pool = register_pool
|
| 183 |
+
|
| 184 |
+
_, tmpl = rng.choice(pool)
|
| 185 |
+
return _materialize(tmpl)
|
|
@@ -4,10 +4,11 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
import copy
|
| 6 |
import json
|
|
|
|
| 7 |
from contextlib import asynccontextmanager
|
| 8 |
from typing import Optional
|
| 9 |
|
| 10 |
-
from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
from fastapi.responses import HTMLResponse
|
| 13 |
from pydantic import BaseModel
|
|
@@ -91,6 +92,20 @@ class ArenaStepRequest(BaseModel):
|
|
| 91 |
|
| 92 |
# ββ App state βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
_envs: dict[str, BazaarEnvironment] = {}
|
| 95 |
_arenas: dict[str, MultiBuyerArena] = {}
|
| 96 |
_ws_connections: dict[str, list[WebSocket]] = {}
|
|
@@ -337,6 +352,90 @@ async def health():
|
|
| 337 |
return {"status": "ok", "version": "2.0.0"}
|
| 338 |
|
| 339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
# ββ Simulate (AI auto-play for spectator mode) ββββββββββββββββββ
|
| 341 |
|
| 342 |
class SimulateRequest(BaseModel):
|
|
@@ -356,8 +455,50 @@ class SellerModeStepRequest(BaseModel):
|
|
| 356 |
price: float
|
| 357 |
|
| 358 |
|
| 359 |
-
def _ai_buyer_action(
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
budget = obs.own_private_budget
|
| 362 |
ask = obs.seller_asking_price
|
| 363 |
opp = obs.opponent_last_offer or ask
|
|
@@ -545,15 +686,33 @@ class SellerModeResetRequest(BaseModel):
|
|
| 545 |
strategy: str = "smart"
|
| 546 |
seed: Optional[int] = None
|
| 547 |
opening_price: float = 60.0
|
|
|
|
|
|
|
| 548 |
|
| 549 |
|
| 550 |
@app.post("/seller-mode/reset")
|
| 551 |
-
async def seller_mode_reset(req: SellerModeResetRequest):
|
| 552 |
"""Start a seller-mode session. User plays as seller, AI plays as buyer."""
|
| 553 |
if req.task not in TASKS:
|
| 554 |
raise HTTPException(status_code=400, detail=f"Unknown task: {req.task}")
|
| 555 |
|
| 556 |
task = copy.deepcopy(TASKS[req.task])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
# Store seller mode state
|
| 558 |
import random
|
| 559 |
session = {
|
|
@@ -562,8 +721,8 @@ async def seller_mode_reset(req: SellerModeResetRequest):
|
|
| 562 |
"rng": random.Random(req.seed),
|
| 563 |
"round": 0,
|
| 564 |
"max_rounds": task.max_steps if task.total_episodes == 1 else task.max_steps // task.total_episodes,
|
| 565 |
-
"buyer_budget":
|
| 566 |
-
"seller_cost":
|
| 567 |
"current_seller_price": req.opening_price,
|
| 568 |
"last_buyer_offer": None,
|
| 569 |
"history": [],
|
|
@@ -577,17 +736,21 @@ async def seller_mode_reset(req: SellerModeResetRequest):
|
|
| 577 |
current_round=0,
|
| 578 |
max_rounds=session["max_rounds"],
|
| 579 |
opponent_last_offer=req.opening_price,
|
| 580 |
-
own_private_budget=
|
| 581 |
rounds_remaining=session["max_rounds"],
|
| 582 |
seller_asking_price=req.opening_price,
|
| 583 |
-
item_name="handwoven silk scarf",
|
| 584 |
message=f"You open at {req.opening_price:.0f} rupees.",
|
| 585 |
)
|
| 586 |
|
| 587 |
# AI buyer makes first offer
|
| 588 |
-
|
|
|
|
| 589 |
session["round"] = 1
|
| 590 |
session["last_buyer_offer"] = action.price
|
|
|
|
|
|
|
|
|
|
| 591 |
session["history"].append({
|
| 592 |
"round": 0,
|
| 593 |
"actor": "seller",
|
|
@@ -599,9 +762,10 @@ async def seller_mode_reset(req: SellerModeResetRequest):
|
|
| 599 |
"actor": "buyer",
|
| 600 |
"action": action.action.value if hasattr(action.action, 'value') else action.action,
|
| 601 |
"price": action.price,
|
|
|
|
| 602 |
})
|
| 603 |
|
| 604 |
-
|
| 605 |
f"Buyer offers {action.price:.0f} rupees."
|
| 606 |
if action.action in ("offer", "OFFER", ActionType.OFFER)
|
| 607 |
else f"Buyer {action.action}s."
|
|
@@ -611,7 +775,8 @@ async def seller_mode_reset(req: SellerModeResetRequest):
|
|
| 611 |
"round": 1,
|
| 612 |
"buyer_action": action.action.value if hasattr(action.action, 'value') else action.action,
|
| 613 |
"buyer_price": action.price,
|
| 614 |
-
"message":
|
|
|
|
| 615 |
"your_opening": req.opening_price,
|
| 616 |
"history": session["history"],
|
| 617 |
"done": False,
|
|
@@ -619,7 +784,7 @@ async def seller_mode_reset(req: SellerModeResetRequest):
|
|
| 619 |
|
| 620 |
|
| 621 |
@app.post("/seller-mode/step")
|
| 622 |
-
async def seller_mode_step(req: SellerModeStepRequest):
|
| 623 |
"""User (as seller) sets counteroffer price. AI buyer responds."""
|
| 624 |
if "seller_mode" not in _envs:
|
| 625 |
raise HTTPException(status_code=400, detail="No seller-mode session. Call /seller-mode/reset first.")
|
|
@@ -690,7 +855,8 @@ async def seller_mode_step(req: SellerModeStepRequest):
|
|
| 690 |
}
|
| 691 |
|
| 692 |
# AI buyer responds
|
| 693 |
-
|
|
|
|
| 694 |
|
| 695 |
if action.action in ("accept", ActionType.ACCEPT):
|
| 696 |
session["done"] = True
|
|
@@ -700,16 +866,20 @@ async def seller_mode_step(req: SellerModeStepRequest):
|
|
| 700 |
max_surplus = session["buyer_budget"] - session["seller_cost"]
|
| 701 |
buyer_score = max(0, surplus / max_surplus) if max_surplus > 0 else 0
|
| 702 |
|
|
|
|
|
|
|
| 703 |
session["history"].append({
|
| 704 |
"round": rnd,
|
| 705 |
"actor": "buyer",
|
| 706 |
"action": "accept",
|
| 707 |
"price": seller_price,
|
|
|
|
| 708 |
})
|
| 709 |
|
| 710 |
return {
|
| 711 |
"round": rnd,
|
| 712 |
-
"message": f"Buyer accepts your price of {seller_price:.0f}! Deal closed.",
|
|
|
|
| 713 |
"buyer_action": "accept",
|
| 714 |
"buyer_price": seller_price,
|
| 715 |
"done": True,
|
|
@@ -723,17 +893,21 @@ async def seller_mode_step(req: SellerModeStepRequest):
|
|
| 723 |
elif action.action in ("walk", ActionType.WALK):
|
| 724 |
session["done"] = True
|
| 725 |
session["outcome"] = "walk"
|
|
|
|
|
|
|
| 726 |
|
| 727 |
session["history"].append({
|
| 728 |
"round": rnd,
|
| 729 |
"actor": "buyer",
|
| 730 |
"action": "walk",
|
| 731 |
"price": None,
|
|
|
|
| 732 |
})
|
| 733 |
|
| 734 |
return {
|
| 735 |
"round": rnd,
|
| 736 |
-
"message": "Buyer walks away! No deal.",
|
|
|
|
| 737 |
"buyer_action": "walk",
|
| 738 |
"buyer_price": None,
|
| 739 |
"done": True,
|
|
@@ -743,16 +917,21 @@ async def seller_mode_step(req: SellerModeStepRequest):
|
|
| 743 |
|
| 744 |
else: # offer
|
| 745 |
session["last_buyer_offer"] = action.price
|
|
|
|
|
|
|
|
|
|
| 746 |
session["history"].append({
|
| 747 |
"round": rnd,
|
| 748 |
"actor": "buyer",
|
| 749 |
"action": "offer",
|
| 750 |
"price": action.price,
|
|
|
|
| 751 |
})
|
| 752 |
|
| 753 |
return {
|
| 754 |
"round": rnd,
|
| 755 |
-
"message": f"Buyer counters with {action.price:.0f} rupees.",
|
|
|
|
| 756 |
"buyer_action": "offer",
|
| 757 |
"buyer_price": action.price,
|
| 758 |
"done": False,
|
|
|
|
| 4 |
|
| 5 |
import copy
|
| 6 |
import json
|
| 7 |
+
import os
|
| 8 |
from contextlib import asynccontextmanager
|
| 9 |
from typing import Optional
|
| 10 |
|
| 11 |
+
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
| 12 |
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
from fastapi.responses import HTMLResponse
|
| 14 |
from pydantic import BaseModel
|
|
|
|
| 92 |
|
| 93 |
# ββ App state βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
|
| 95 |
+
def _client_ip(request: Request) -> Optional[str]:
|
| 96 |
+
"""Best-effort client IP for rate-limiting. Honors X-Forwarded-For when
|
| 97 |
+
deployed behind a proxy/CDN; falls back to direct socket peer.
|
| 98 |
+
|
| 99 |
+
Note: in untrusted environments XFF can be spoofed. Hosting plan today
|
| 100 |
+
is direct uvicorn or behind a single-hop reverse proxy we control, so
|
| 101 |
+
trusting the leftmost XFF entry is acceptable.
|
| 102 |
+
"""
|
| 103 |
+
xff = request.headers.get("x-forwarded-for")
|
| 104 |
+
if xff:
|
| 105 |
+
return xff.split(",")[0].strip() or None
|
| 106 |
+
return request.client.host if request.client else None
|
| 107 |
+
|
| 108 |
+
|
| 109 |
_envs: dict[str, BazaarEnvironment] = {}
|
| 110 |
_arenas: dict[str, MultiBuyerArena] = {}
|
| 111 |
_ws_connections: dict[str, list[WebSocket]] = {}
|
|
|
|
| 352 |
return {"status": "ok", "version": "2.0.0"}
|
| 353 |
|
| 354 |
|
| 355 |
+
# ββ Highlight: span-level tell extraction for the /sell page ββββ
|
| 356 |
+
|
| 357 |
+
class HighlightRequest(BaseModel):
|
| 358 |
+
message: str
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
class HighlightSpan(BaseModel):
|
| 362 |
+
start: int
|
| 363 |
+
end: int
|
| 364 |
+
text: str
|
| 365 |
+
signal: str
|
| 366 |
+
score: float
|
| 367 |
+
explanation: str
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
class HighlightResponse(BaseModel):
|
| 371 |
+
spans: list[HighlightSpan]
|
| 372 |
+
aggregate: dict[str, float]
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
@app.post("/highlight", response_model=HighlightResponse)
|
| 376 |
+
async def highlight(req: HighlightRequest):
|
| 377 |
+
"""Find tell-triggering phrases in a seller message and return char spans.
|
| 378 |
+
|
| 379 |
+
Used by the /sell page to underline urgency/deception/condition phrases
|
| 380 |
+
in the user's chat bubble after they send. Pattern-based, deterministic,
|
| 381 |
+
no LLM call β instant.
|
| 382 |
+
"""
|
| 383 |
+
from nlp.keyword_patterns import find_matches, aggregate_signals
|
| 384 |
+
|
| 385 |
+
matches = find_matches(req.message)
|
| 386 |
+
return HighlightResponse(
|
| 387 |
+
spans=[
|
| 388 |
+
HighlightSpan(
|
| 389 |
+
start=m.start, end=m.end, text=m.text,
|
| 390 |
+
signal=m.signal, score=m.score, explanation=m.explanation,
|
| 391 |
+
)
|
| 392 |
+
for m in matches
|
| 393 |
+
],
|
| 394 |
+
aggregate=aggregate_signals(matches),
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
@app.get("/sauda/health")
|
| 399 |
+
async def sauda_health(request: Request):
|
| 400 |
+
"""Probe both backends. Used to choose strategy and surface config errors.
|
| 401 |
+
|
| 402 |
+
Public response is intentionally minimal: just a green/red signal.
|
| 403 |
+
For the full ops view (spend, rate-limit hits, circuit-breaker state),
|
| 404 |
+
pass the X-Sauda-Admin header matching SAUDA_ADMIN_TOKEN env-var.
|
| 405 |
+
"""
|
| 406 |
+
from .sauda_buyer import health as _full_health
|
| 407 |
+
full = _full_health()
|
| 408 |
+
admin_token = os.environ.get("SAUDA_ADMIN_TOKEN", "").strip()
|
| 409 |
+
is_admin = bool(admin_token) and request.headers.get("x-sauda-admin", "") == admin_token
|
| 410 |
+
if is_admin:
|
| 411 |
+
return full
|
| 412 |
+
# Public view: only the bits a UI needs to decide whether the live agent
|
| 413 |
+
# is reachable. No spend numbers, no IP counts, no circuit breaker state.
|
| 414 |
+
return {
|
| 415 |
+
"status": "ok" if (full.get("hf_ok") or full.get("ollama_ok")) else "degraded",
|
| 416 |
+
"live_agent_available": bool(full.get("hf_ok") or full.get("ollama_ok")),
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
@app.get("/sauda/backends")
|
| 421 |
+
async def sauda_backends():
|
| 422 |
+
"""Static metadata about available buyer backends, for the /sell UI dropdown."""
|
| 423 |
+
return {
|
| 424 |
+
"backends": [
|
| 425 |
+
{"id": "sauda", "label": "Sauda v2 (HF Endpoint)", "primary": True,
|
| 426 |
+
"description": "Llama-3.1-8B + SFT+GRPO LoRA, served via HF Inference Endpoint."},
|
| 427 |
+
{"id": "sauda_ollama", "label": "Sauda v2 (Ollama, local)", "primary": False,
|
| 428 |
+
"description": "Same adapter, served locally via Ollama. Fallback when HF endpoint is unavailable."},
|
| 429 |
+
{"id": "smart", "label": "Rule-based (smart)", "primary": False,
|
| 430 |
+
"description": "Heuristic baseline. No LLM. Always available."},
|
| 431 |
+
{"id": "naive", "label": "Rule-based (naive)", "primary": False,
|
| 432 |
+
"description": "Easy buyer for seller-mode warmup."},
|
| 433 |
+
{"id": "aggressive", "label": "Rule-based (aggressive)", "primary": False,
|
| 434 |
+
"description": "Hard rule-based buyer."},
|
| 435 |
+
]
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
# ββ Simulate (AI auto-play for spectator mode) ββββββββββββββββββ
|
| 440 |
|
| 441 |
class SimulateRequest(BaseModel):
|
|
|
|
| 455 |
price: float
|
| 456 |
|
| 457 |
|
| 458 |
+
def _ai_buyer_action(
|
| 459 |
+
obs: BazaarObservation,
|
| 460 |
+
strategy: str,
|
| 461 |
+
rng,
|
| 462 |
+
*,
|
| 463 |
+
client_ip: Optional[str] = None,
|
| 464 |
+
) -> BazaarAction:
|
| 465 |
+
"""Built-in AI buyer strategies for spectator / seller mode.
|
| 466 |
+
|
| 467 |
+
`strategy` values:
|
| 468 |
+
- "sauda" / "sauda_hf" β HF Inference Endpoint serving Sauda v2
|
| 469 |
+
- "sauda_ollama" β local ollama serving Sauda v2
|
| 470 |
+
- "smart" / "naive" / "aggressive" β rule-based heuristics (no LLM)
|
| 471 |
+
|
| 472 |
+
`client_ip` is forwarded to the safety layer for per-IP rate-limiting on
|
| 473 |
+
the metered HF backend; pass None for trusted server-internal callers.
|
| 474 |
+
"""
|
| 475 |
+
# Live Sauda v2 path (HF endpoint primary, Ollama fallback selectable)
|
| 476 |
+
if strategy in ("sauda", "sauda_hf", "sauda_ollama"):
|
| 477 |
+
from .sauda_buyer import sauda_action
|
| 478 |
+
backend = "ollama" if strategy == "sauda_ollama" else "hf"
|
| 479 |
+
obs_dict = obs.model_dump() if hasattr(obs, "model_dump") else obs.dict()
|
| 480 |
+
result = sauda_action(obs_dict, backend=backend, client_ip=client_ip)
|
| 481 |
+
action_str = result.get("action", "offer")
|
| 482 |
+
price = result.get("price")
|
| 483 |
+
msg = result.get("message", "")
|
| 484 |
+
if action_str == "accept":
|
| 485 |
+
ba = BazaarAction(action="accept")
|
| 486 |
+
elif action_str == "walk":
|
| 487 |
+
ba = BazaarAction(action="walk")
|
| 488 |
+
else:
|
| 489 |
+
ba = BazaarAction(action="offer", price=float(price) if price is not None else round((obs.own_private_budget or 100) * 0.3, 2))
|
| 490 |
+
# Smuggle the model's prose message + backend trace through a side channel
|
| 491 |
+
# (BazaarAction has no message field; the route handler reads .sauda_message
|
| 492 |
+
# off the action when present).
|
| 493 |
+
try:
|
| 494 |
+
object.__setattr__(ba, "sauda_message", msg)
|
| 495 |
+
object.__setattr__(ba, "sauda_backend", result.get("backend", backend))
|
| 496 |
+
if result.get("error"):
|
| 497 |
+
object.__setattr__(ba, "sauda_error", result["error"])
|
| 498 |
+
except Exception:
|
| 499 |
+
pass
|
| 500 |
+
return ba
|
| 501 |
+
|
| 502 |
budget = obs.own_private_budget
|
| 503 |
ask = obs.seller_asking_price
|
| 504 |
opp = obs.opponent_last_offer or ask
|
|
|
|
| 686 |
strategy: str = "smart"
|
| 687 |
seed: Optional[int] = None
|
| 688 |
opening_price: float = 60.0
|
| 689 |
+
item_name: Optional[str] = None
|
| 690 |
+
listing_price: Optional[float] = None # if user picked a real listing, this is its MRP
|
| 691 |
|
| 692 |
|
| 693 |
@app.post("/seller-mode/reset")
|
| 694 |
+
async def seller_mode_reset(req: SellerModeResetRequest, request: Request):
|
| 695 |
"""Start a seller-mode session. User plays as seller, AI plays as buyer."""
|
| 696 |
if req.task not in TASKS:
|
| 697 |
raise HTTPException(status_code=400, detail=f"Unknown task: {req.task}")
|
| 698 |
|
| 699 |
task = copy.deepcopy(TASKS[req.task])
|
| 700 |
+
|
| 701 |
+
# Tasks have hardcoded buyer_budget / seller_cost from synthetic examples.
|
| 702 |
+
# When the user opens at a real-listing price ($2695 for an iPhone, $399
|
| 703 |
+
# for a sofa, etc) those numbers become nonsense and Sauda offers $30 on
|
| 704 |
+
# a $2695 ask. Anchor the scale on the task's *opening price prior* β
|
| 705 |
+
# buyer_budget = 1.67Γask in single_deal (60 β 100), and the relative
|
| 706 |
+
# ratios (cost / budget β 0.35, ask / budget β 0.6) hold across tasks.
|
| 707 |
+
# Derive sane budget/cost from the user's actual opening_price using those
|
| 708 |
+
# ratios so the buyer's model of the deal scales with the listing.
|
| 709 |
+
if req.opening_price and req.opening_price > 0:
|
| 710 |
+
scaled_budget = float(req.opening_price) * 1.05 # buyer can stretch ~5% above ask
|
| 711 |
+
scaled_cost = float(req.opening_price) * 0.35 # seller's true cost ~35% of ask
|
| 712 |
+
else:
|
| 713 |
+
scaled_budget = task.buyer_budget
|
| 714 |
+
scaled_cost = task.seller_cost
|
| 715 |
+
|
| 716 |
# Store seller mode state
|
| 717 |
import random
|
| 718 |
session = {
|
|
|
|
| 721 |
"rng": random.Random(req.seed),
|
| 722 |
"round": 0,
|
| 723 |
"max_rounds": task.max_steps if task.total_episodes == 1 else task.max_steps // task.total_episodes,
|
| 724 |
+
"buyer_budget": scaled_budget,
|
| 725 |
+
"seller_cost": scaled_cost,
|
| 726 |
"current_seller_price": req.opening_price,
|
| 727 |
"last_buyer_offer": None,
|
| 728 |
"history": [],
|
|
|
|
| 736 |
current_round=0,
|
| 737 |
max_rounds=session["max_rounds"],
|
| 738 |
opponent_last_offer=req.opening_price,
|
| 739 |
+
own_private_budget=scaled_budget,
|
| 740 |
rounds_remaining=session["max_rounds"],
|
| 741 |
seller_asking_price=req.opening_price,
|
| 742 |
+
item_name=req.item_name or "handwoven silk scarf",
|
| 743 |
message=f"You open at {req.opening_price:.0f} rupees.",
|
| 744 |
)
|
| 745 |
|
| 746 |
# AI buyer makes first offer
|
| 747 |
+
client_ip = _client_ip(request)
|
| 748 |
+
action = _ai_buyer_action(obs, req.strategy, session["rng"], client_ip=client_ip)
|
| 749 |
session["round"] = 1
|
| 750 |
session["last_buyer_offer"] = action.price
|
| 751 |
+
sauda_msg = getattr(action, "sauda_message", None) or ""
|
| 752 |
+
sauda_backend = getattr(action, "sauda_backend", None)
|
| 753 |
+
sauda_error = getattr(action, "sauda_error", None)
|
| 754 |
session["history"].append({
|
| 755 |
"round": 0,
|
| 756 |
"actor": "seller",
|
|
|
|
| 762 |
"actor": "buyer",
|
| 763 |
"action": action.action.value if hasattr(action.action, 'value') else action.action,
|
| 764 |
"price": action.price,
|
| 765 |
+
"message": sauda_msg,
|
| 766 |
})
|
| 767 |
|
| 768 |
+
fallback_msg = (
|
| 769 |
f"Buyer offers {action.price:.0f} rupees."
|
| 770 |
if action.action in ("offer", "OFFER", ActionType.OFFER)
|
| 771 |
else f"Buyer {action.action}s."
|
|
|
|
| 775 |
"round": 1,
|
| 776 |
"buyer_action": action.action.value if hasattr(action.action, 'value') else action.action,
|
| 777 |
"buyer_price": action.price,
|
| 778 |
+
"message": sauda_msg or fallback_msg,
|
| 779 |
+
"buyer_message": sauda_msg,
|
| 780 |
"your_opening": req.opening_price,
|
| 781 |
"history": session["history"],
|
| 782 |
"done": False,
|
|
|
|
| 784 |
|
| 785 |
|
| 786 |
@app.post("/seller-mode/step")
|
| 787 |
+
async def seller_mode_step(req: SellerModeStepRequest, request: Request):
|
| 788 |
"""User (as seller) sets counteroffer price. AI buyer responds."""
|
| 789 |
if "seller_mode" not in _envs:
|
| 790 |
raise HTTPException(status_code=400, detail="No seller-mode session. Call /seller-mode/reset first.")
|
|
|
|
| 855 |
}
|
| 856 |
|
| 857 |
# AI buyer responds
|
| 858 |
+
client_ip = _client_ip(request)
|
| 859 |
+
action = _ai_buyer_action(obs, session["strategy"], session["rng"], client_ip=client_ip)
|
| 860 |
|
| 861 |
if action.action in ("accept", ActionType.ACCEPT):
|
| 862 |
session["done"] = True
|
|
|
|
| 866 |
max_surplus = session["buyer_budget"] - session["seller_cost"]
|
| 867 |
buyer_score = max(0, surplus / max_surplus) if max_surplus > 0 else 0
|
| 868 |
|
| 869 |
+
sauda_msg = getattr(action, "sauda_message", None) or ""
|
| 870 |
+
sauda_backend = getattr(action, "sauda_backend", None)
|
| 871 |
session["history"].append({
|
| 872 |
"round": rnd,
|
| 873 |
"actor": "buyer",
|
| 874 |
"action": "accept",
|
| 875 |
"price": seller_price,
|
| 876 |
+
"message": sauda_msg,
|
| 877 |
})
|
| 878 |
|
| 879 |
return {
|
| 880 |
"round": rnd,
|
| 881 |
+
"message": sauda_msg or f"Buyer accepts your price of {seller_price:.0f}! Deal closed.",
|
| 882 |
+
"buyer_message": sauda_msg,
|
| 883 |
"buyer_action": "accept",
|
| 884 |
"buyer_price": seller_price,
|
| 885 |
"done": True,
|
|
|
|
| 893 |
elif action.action in ("walk", ActionType.WALK):
|
| 894 |
session["done"] = True
|
| 895 |
session["outcome"] = "walk"
|
| 896 |
+
sauda_msg = getattr(action, "sauda_message", None) or ""
|
| 897 |
+
sauda_backend = getattr(action, "sauda_backend", None)
|
| 898 |
|
| 899 |
session["history"].append({
|
| 900 |
"round": rnd,
|
| 901 |
"actor": "buyer",
|
| 902 |
"action": "walk",
|
| 903 |
"price": None,
|
| 904 |
+
"message": sauda_msg,
|
| 905 |
})
|
| 906 |
|
| 907 |
return {
|
| 908 |
"round": rnd,
|
| 909 |
+
"message": sauda_msg or "Buyer walks away! No deal.",
|
| 910 |
+
"buyer_message": sauda_msg,
|
| 911 |
"buyer_action": "walk",
|
| 912 |
"buyer_price": None,
|
| 913 |
"done": True,
|
|
|
|
| 917 |
|
| 918 |
else: # offer
|
| 919 |
session["last_buyer_offer"] = action.price
|
| 920 |
+
sauda_msg = getattr(action, "sauda_message", None) or ""
|
| 921 |
+
sauda_backend = getattr(action, "sauda_backend", None)
|
| 922 |
+
sauda_error = getattr(action, "sauda_error", None)
|
| 923 |
session["history"].append({
|
| 924 |
"round": rnd,
|
| 925 |
"actor": "buyer",
|
| 926 |
"action": "offer",
|
| 927 |
"price": action.price,
|
| 928 |
+
"message": sauda_msg,
|
| 929 |
})
|
| 930 |
|
| 931 |
return {
|
| 932 |
"round": rnd,
|
| 933 |
+
"message": sauda_msg or f"Buyer counters with {action.price:.0f} rupees.",
|
| 934 |
+
"buyer_message": sauda_msg,
|
| 935 |
"buyer_action": "offer",
|
| 936 |
"buyer_price": action.price,
|
| 937 |
"done": False,
|
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cost & abuse defenses for the live Sauda HF Inference Endpoint.
|
| 2 |
+
|
| 3 |
+
The /sell page is exposed to the public during the demo window. A bot loop on
|
| 4 |
+
/seller-mode/step would burn HF tokens unbounded. This module gates every HF
|
| 5 |
+
call behind:
|
| 6 |
+
|
| 7 |
+
1. Hard daily call cap (HF only β ollama/rule are unmetered locally).
|
| 8 |
+
2. Per-IP sliding-window rate limit.
|
| 9 |
+
3. Global concurrent-in-flight cap.
|
| 10 |
+
4. Circuit breaker: if HF errors N times in a row, lock to fallback for K min.
|
| 11 |
+
5. Prompt-size cap (anti-prompt-injection ballooning).
|
| 12 |
+
|
| 13 |
+
When a gate trips, we silently downgrade to the next backend (ollama β rule).
|
| 14 |
+
We never tell the user "you've been rate limited" β the UI just sees a slightly
|
| 15 |
+
slower or simpler buyer. The internals are surfaced via /sauda/health for ops.
|
| 16 |
+
|
| 17 |
+
Counters persist to disk (`runs/safety_state.json`) so a restart doesn't reset
|
| 18 |
+
the daily cap and let an attacker get a fresh budget.
|
| 19 |
+
|
| 20 |
+
All gates default to permissive numbers tuned for "live demo, ~50 humans poking
|
| 21 |
+
at it for an hour"; tighten via env-vars for production.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
import json
|
| 27 |
+
import os
|
| 28 |
+
import threading
|
| 29 |
+
import time
|
| 30 |
+
from collections import deque
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
from typing import Any, Optional
|
| 33 |
+
|
| 34 |
+
# ββ Tunables ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
|
| 36 |
+
# Hard cap on total HF calls per UTC day. Trip β flip to ollama for the rest of
|
| 37 |
+
# the day. Default 1500 β ~$3-5 of a10g-small at typical token sizes.
|
| 38 |
+
MAX_HF_CALLS_PER_DAY = int(os.environ.get("SAUDA_HF_MAX_CALLS_PER_DAY", "1500"))
|
| 39 |
+
|
| 40 |
+
# Per-IP sliding-window. (window_seconds, max_calls) tuples.
|
| 41 |
+
IP_LIMITS: list[tuple[int, int]] = [
|
| 42 |
+
(60, int(os.environ.get("SAUDA_RL_PER_MIN", "30"))),
|
| 43 |
+
(3600, int(os.environ.get("SAUDA_RL_PER_HOUR", "200"))),
|
| 44 |
+
(86400, int(os.environ.get("SAUDA_RL_PER_DAY", "500"))),
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
# Max concurrent in-flight HF calls. Excess gets ollama immediately.
|
| 48 |
+
MAX_CONCURRENT_HF = int(os.environ.get("SAUDA_MAX_CONCURRENT_HF", "4"))
|
| 49 |
+
|
| 50 |
+
# Circuit breaker: trip after N consecutive HF errors, stay tripped for K seconds.
|
| 51 |
+
CB_ERROR_THRESHOLD = int(os.environ.get("SAUDA_CB_ERRORS", "3"))
|
| 52 |
+
CB_COOLDOWN_SEC = int(os.environ.get("SAUDA_CB_COOLDOWN", "300"))
|
| 53 |
+
|
| 54 |
+
# Reject prompts longer than this many chars (anti-injection ballooning).
|
| 55 |
+
MAX_PROMPT_CHARS = int(os.environ.get("SAUDA_MAX_PROMPT_CHARS", "4000"))
|
| 56 |
+
|
| 57 |
+
STATE_FILE = Path(os.environ.get("SAUDA_SAFETY_STATE", "runs/safety_state.json"))
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# ββ Internal state ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
|
| 62 |
+
_lock = threading.Lock()
|
| 63 |
+
|
| 64 |
+
# IP β deque[float timestamps]
|
| 65 |
+
_ip_calls: dict[str, deque[float]] = {}
|
| 66 |
+
|
| 67 |
+
# Global concurrency counter.
|
| 68 |
+
_inflight = 0
|
| 69 |
+
|
| 70 |
+
# Circuit breaker state.
|
| 71 |
+
_consecutive_errors = 0
|
| 72 |
+
_cb_open_until: float = 0.0
|
| 73 |
+
|
| 74 |
+
# Daily counter: { "utc_date": "YYYY-MM-DD", "calls": int }
|
| 75 |
+
_daily = {"utc_date": "", "calls": 0}
|
| 76 |
+
|
| 77 |
+
# Total spend trace for ops (resets on restart, not safety-critical).
|
| 78 |
+
_lifetime = {"hf_calls": 0, "hf_errors": 0, "ollama_calls": 0, "rule_calls": 0,
|
| 79 |
+
"blocked_daily": 0, "blocked_ip": 0, "blocked_concurrency": 0,
|
| 80 |
+
"blocked_circuit": 0, "blocked_prompt": 0}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _today() -> str:
|
| 84 |
+
return time.strftime("%Y-%m-%d", time.gmtime())
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _load_state() -> None:
|
| 88 |
+
global _daily
|
| 89 |
+
if not STATE_FILE.exists():
|
| 90 |
+
return
|
| 91 |
+
try:
|
| 92 |
+
data = json.loads(STATE_FILE.read_text())
|
| 93 |
+
if isinstance(data, dict) and data.get("utc_date") == _today():
|
| 94 |
+
_daily = {"utc_date": data["utc_date"], "calls": int(data.get("calls", 0))}
|
| 95 |
+
except Exception:
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _persist_state() -> None:
|
| 100 |
+
try:
|
| 101 |
+
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
| 102 |
+
STATE_FILE.write_text(json.dumps(_daily))
|
| 103 |
+
except Exception:
|
| 104 |
+
pass
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
_load_state()
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
class HFCallDenied(Exception):
|
| 114 |
+
"""Raised when a safety gate refuses an HF call. Caller should fall back."""
|
| 115 |
+
def __init__(self, reason: str, gate: str):
|
| 116 |
+
super().__init__(reason)
|
| 117 |
+
self.reason = reason
|
| 118 |
+
self.gate = gate
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def check_prompt_size(text: str) -> None:
|
| 122 |
+
"""Reject prompts that are too large to be plausibly normal."""
|
| 123 |
+
if len(text) > MAX_PROMPT_CHARS:
|
| 124 |
+
with _lock:
|
| 125 |
+
_lifetime["blocked_prompt"] += 1
|
| 126 |
+
raise HFCallDenied(
|
| 127 |
+
f"prompt {len(text)} chars > cap {MAX_PROMPT_CHARS}",
|
| 128 |
+
gate="prompt_size",
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def acquire_hf_slot(client_ip: Optional[str] = None) -> None:
|
| 133 |
+
"""Check all gates and reserve an in-flight slot for an HF call.
|
| 134 |
+
|
| 135 |
+
Caller MUST call `release_hf_slot(success=...)` after the call (in finally).
|
| 136 |
+
Raises HFCallDenied if any gate trips.
|
| 137 |
+
"""
|
| 138 |
+
global _inflight
|
| 139 |
+
now = time.time()
|
| 140 |
+
today = _today()
|
| 141 |
+
|
| 142 |
+
with _lock:
|
| 143 |
+
# 1) Roll over daily counter at UTC midnight.
|
| 144 |
+
if _daily["utc_date"] != today:
|
| 145 |
+
_daily["utc_date"] = today
|
| 146 |
+
_daily["calls"] = 0
|
| 147 |
+
_persist_state()
|
| 148 |
+
|
| 149 |
+
# 2) Daily hard cap.
|
| 150 |
+
if _daily["calls"] >= MAX_HF_CALLS_PER_DAY:
|
| 151 |
+
_lifetime["blocked_daily"] += 1
|
| 152 |
+
raise HFCallDenied(
|
| 153 |
+
f"daily HF cap {MAX_HF_CALLS_PER_DAY} reached",
|
| 154 |
+
gate="daily_cap",
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# 3) Circuit breaker.
|
| 158 |
+
if now < _cb_open_until:
|
| 159 |
+
_lifetime["blocked_circuit"] += 1
|
| 160 |
+
raise HFCallDenied(
|
| 161 |
+
f"circuit breaker open for {int(_cb_open_until - now)}s more",
|
| 162 |
+
gate="circuit_breaker",
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# 4) Concurrency.
|
| 166 |
+
if _inflight >= MAX_CONCURRENT_HF:
|
| 167 |
+
_lifetime["blocked_concurrency"] += 1
|
| 168 |
+
raise HFCallDenied(
|
| 169 |
+
f"concurrent in-flight cap {MAX_CONCURRENT_HF} reached",
|
| 170 |
+
gate="concurrency",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# 5) Per-IP sliding windows.
|
| 174 |
+
if client_ip:
|
| 175 |
+
dq = _ip_calls.setdefault(client_ip, deque())
|
| 176 |
+
for window_s, max_calls in IP_LIMITS:
|
| 177 |
+
cutoff = now - window_s
|
| 178 |
+
while dq and dq[0] < cutoff:
|
| 179 |
+
dq.popleft()
|
| 180 |
+
count_in_window = sum(1 for t in dq if t >= cutoff)
|
| 181 |
+
if count_in_window >= max_calls:
|
| 182 |
+
_lifetime["blocked_ip"] += 1
|
| 183 |
+
raise HFCallDenied(
|
| 184 |
+
f"ip {client_ip} hit {max_calls}/{window_s}s",
|
| 185 |
+
gate=f"ip_rate_{window_s}s",
|
| 186 |
+
)
|
| 187 |
+
dq.append(now)
|
| 188 |
+
|
| 189 |
+
# All gates passed β reserve.
|
| 190 |
+
_inflight += 1
|
| 191 |
+
_daily["calls"] += 1
|
| 192 |
+
_lifetime["hf_calls"] += 1
|
| 193 |
+
# Persist every 10 calls to keep disk writes cheap but bounded.
|
| 194 |
+
if _daily["calls"] % 10 == 0:
|
| 195 |
+
_persist_state()
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def release_hf_slot(success: bool) -> None:
|
| 199 |
+
"""Mark an in-flight HF call done. `success` updates the circuit breaker."""
|
| 200 |
+
global _inflight, _consecutive_errors, _cb_open_until
|
| 201 |
+
with _lock:
|
| 202 |
+
_inflight = max(0, _inflight - 1)
|
| 203 |
+
if success:
|
| 204 |
+
_consecutive_errors = 0
|
| 205 |
+
else:
|
| 206 |
+
_consecutive_errors += 1
|
| 207 |
+
_lifetime["hf_errors"] += 1
|
| 208 |
+
if _consecutive_errors >= CB_ERROR_THRESHOLD:
|
| 209 |
+
_cb_open_until = time.time() + CB_COOLDOWN_SEC
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def note_fallback(kind: str) -> None:
|
| 213 |
+
"""Track non-HF backend usage (for /sauda/health stats)."""
|
| 214 |
+
with _lock:
|
| 215 |
+
if kind == "ollama":
|
| 216 |
+
_lifetime["ollama_calls"] += 1
|
| 217 |
+
elif kind == "rule":
|
| 218 |
+
_lifetime["rule_calls"] += 1
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def stats() -> dict[str, Any]:
|
| 222 |
+
"""Snapshot of safety state, surfaced via /sauda/health (ops use only)."""
|
| 223 |
+
with _lock:
|
| 224 |
+
now = time.time()
|
| 225 |
+
return {
|
| 226 |
+
"daily": dict(_daily),
|
| 227 |
+
"daily_cap": MAX_HF_CALLS_PER_DAY,
|
| 228 |
+
"inflight": _inflight,
|
| 229 |
+
"concurrency_cap": MAX_CONCURRENT_HF,
|
| 230 |
+
"circuit_breaker_open": now < _cb_open_until,
|
| 231 |
+
"circuit_breaker_open_for_s": max(0, int(_cb_open_until - now)),
|
| 232 |
+
"consecutive_errors": _consecutive_errors,
|
| 233 |
+
"lifetime": dict(_lifetime),
|
| 234 |
+
"ip_limits": [{"window_s": w, "max_calls": n} for w, n in IP_LIMITS],
|
| 235 |
+
"tracked_ips": len(_ip_calls),
|
| 236 |
+
}
|
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Live Sauda buyer endpoints for the /sell page.
|
| 2 |
+
|
| 3 |
+
Two backends:
|
| 4 |
+
- "hf" β POST to a Hugging Face Inference Endpoint (production)
|
| 5 |
+
- "ollama" β POST to a local ollama server (fallback / dev)
|
| 6 |
+
|
| 7 |
+
Configuration via env-vars:
|
| 8 |
+
SAUDA_BACKEND β "hf" (default), "ollama", or "rule" (skip LLM)
|
| 9 |
+
SAUDA_HF_URL β full HF Inference Endpoint URL, e.g.
|
| 10 |
+
"https://abc123.us-east-1.aws.endpoints.huggingface.cloud"
|
| 11 |
+
SAUDA_HF_TOKEN β HF token with read access to the endpoint
|
| 12 |
+
SAUDA_OLLAMA_URL β ollama base URL (default http://localhost:11434)
|
| 13 |
+
SAUDA_OLLAMA_MODEL β ollama tag (default "bestdealbot")
|
| 14 |
+
|
| 15 |
+
Both paths render the buyer's observation through the same prompt the eval
|
| 16 |
+
harness uses (DEFAULT_SYSTEM_PROMPT + format_observation), parse the action
|
| 17 |
+
via parse_action, and apply the same Bayesian seller-tell steering as the
|
| 18 |
+
v2 evaluation runs. Result: the /sell page sees the exact same buyer the
|
| 19 |
+
research numbers are based on, just exposed over HTTP instead of in-process.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import json
|
| 25 |
+
import os
|
| 26 |
+
import re
|
| 27 |
+
from typing import Any, Optional
|
| 28 |
+
|
| 29 |
+
import requests
|
| 30 |
+
|
| 31 |
+
from bazaarbot_env import (
|
| 32 |
+
DEFAULT_SYSTEM_PROMPT,
|
| 33 |
+
format_observation,
|
| 34 |
+
parse_action,
|
| 35 |
+
steer_bayesian_action,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
from .safety import (
|
| 39 |
+
HFCallDenied,
|
| 40 |
+
acquire_hf_slot,
|
| 41 |
+
check_prompt_size,
|
| 42 |
+
note_fallback,
|
| 43 |
+
release_hf_slot,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _build_prompt(obs_dict: dict[str, Any]) -> tuple[str, str]:
|
| 51 |
+
"""Return (system, user) messages for chat-style backends."""
|
| 52 |
+
return DEFAULT_SYSTEM_PROMPT, format_observation(obs_dict)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _post_json(url: str, payload: dict[str, Any], headers: dict[str, str], timeout: int = 30) -> dict:
|
| 56 |
+
resp = requests.post(url, json=payload, headers=headers, timeout=timeout)
|
| 57 |
+
resp.raise_for_status()
|
| 58 |
+
return resp.json()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# ββ HF Inference Endpoint backend ββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _hf_chat(system: str, user: str, *, max_new_tokens: int = 96, temperature: float = 0.6) -> str:
|
| 65 |
+
"""POST to a HF Inference Endpoint serving a text-generation model.
|
| 66 |
+
|
| 67 |
+
Endpoints accept either OpenAI-compatible chat completions OR HF native
|
| 68 |
+
text-generation payloads depending on how they're deployed. We send the
|
| 69 |
+
OpenAI-compatible shape first since modern HF endpoints support it.
|
| 70 |
+
"""
|
| 71 |
+
url = os.environ.get("SAUDA_HF_URL", "").rstrip("/")
|
| 72 |
+
token = os.environ.get("SAUDA_HF_TOKEN") or os.environ.get("HF_TOKEN")
|
| 73 |
+
if not url or not token:
|
| 74 |
+
raise RuntimeError("SAUDA_HF_URL and SAUDA_HF_TOKEN must be set")
|
| 75 |
+
|
| 76 |
+
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
| 77 |
+
|
| 78 |
+
# Try OpenAI-compatible chat completions endpoint first
|
| 79 |
+
chat_url = url + "/v1/chat/completions"
|
| 80 |
+
chat_payload = {
|
| 81 |
+
"messages": [
|
| 82 |
+
{"role": "system", "content": system},
|
| 83 |
+
{"role": "user", "content": user},
|
| 84 |
+
],
|
| 85 |
+
"max_tokens": max_new_tokens,
|
| 86 |
+
"temperature": temperature,
|
| 87 |
+
"top_p": 0.9,
|
| 88 |
+
}
|
| 89 |
+
try:
|
| 90 |
+
data = _post_json(chat_url, chat_payload, headers)
|
| 91 |
+
return data["choices"][0]["message"]["content"]
|
| 92 |
+
except Exception:
|
| 93 |
+
pass
|
| 94 |
+
|
| 95 |
+
# Fall back to HF native text-generation
|
| 96 |
+
payload = {
|
| 97 |
+
"inputs": f"{system}\n\n{user}\n",
|
| 98 |
+
"parameters": {
|
| 99 |
+
"max_new_tokens": max_new_tokens,
|
| 100 |
+
"temperature": temperature,
|
| 101 |
+
"top_p": 0.9,
|
| 102 |
+
"return_full_text": False,
|
| 103 |
+
},
|
| 104 |
+
}
|
| 105 |
+
data = _post_json(url, payload, headers)
|
| 106 |
+
if isinstance(data, list) and data and "generated_text" in data[0]:
|
| 107 |
+
return data[0]["generated_text"]
|
| 108 |
+
if isinstance(data, dict) and "generated_text" in data:
|
| 109 |
+
return data["generated_text"]
|
| 110 |
+
raise RuntimeError(f"Unexpected HF endpoint response shape: {str(data)[:200]}")
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ββ Ollama backend βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _ollama_chat(system: str, user: str, *, max_new_tokens: int = 96, temperature: float = 0.6) -> str:
|
| 117 |
+
"""POST to a local ollama server."""
|
| 118 |
+
host = os.environ.get("SAUDA_OLLAMA_URL", "http://localhost:11434").rstrip("/")
|
| 119 |
+
model = os.environ.get("SAUDA_OLLAMA_MODEL", "bestdealbot")
|
| 120 |
+
|
| 121 |
+
payload = {
|
| 122 |
+
"model": model,
|
| 123 |
+
"messages": [
|
| 124 |
+
{"role": "system", "content": system},
|
| 125 |
+
{"role": "user", "content": user},
|
| 126 |
+
],
|
| 127 |
+
"stream": False,
|
| 128 |
+
"options": {
|
| 129 |
+
"temperature": temperature,
|
| 130 |
+
"top_p": 0.9,
|
| 131 |
+
"num_predict": max_new_tokens,
|
| 132 |
+
},
|
| 133 |
+
}
|
| 134 |
+
data = _post_json(f"{host}/api/chat", payload, {}, timeout=60)
|
| 135 |
+
return data.get("message", {}).get("content", "")
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ββ Public entrypoint ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def sauda_action(
|
| 142 |
+
obs_dict: dict[str, Any],
|
| 143 |
+
*,
|
| 144 |
+
backend: Optional[str] = None,
|
| 145 |
+
use_steering: bool = True,
|
| 146 |
+
client_ip: Optional[str] = None,
|
| 147 |
+
) -> dict[str, Any]:
|
| 148 |
+
"""Get a buyer action from Sauda v2.
|
| 149 |
+
|
| 150 |
+
Returns dict with keys: action ("offer"|"accept"|"walk"), price (float|None),
|
| 151 |
+
message (str), backend (str echoing which path served), error (str if any).
|
| 152 |
+
|
| 153 |
+
Never raises β falls back through HF β ollama β rule. The /sell page is
|
| 154 |
+
interactive and a 500 mid-demo is worse than a dumb fallback.
|
| 155 |
+
|
| 156 |
+
Safety gates (rate/spend/concurrency caps) wrap the HF path; if any trips,
|
| 157 |
+
we silently downgrade to ollama and don't tell the user. `client_ip` is
|
| 158 |
+
used for per-IP rate-limiting; pass None for trusted server-internal calls.
|
| 159 |
+
"""
|
| 160 |
+
chosen = (backend or os.environ.get("SAUDA_BACKEND") or "hf").lower()
|
| 161 |
+
system, user = _build_prompt(obs_dict)
|
| 162 |
+
|
| 163 |
+
text = ""
|
| 164 |
+
err: Optional[str] = None
|
| 165 |
+
served_by = chosen
|
| 166 |
+
|
| 167 |
+
def _try_hf() -> str:
|
| 168 |
+
"""HF path with safety gates. Raises on any failure (caller falls back)."""
|
| 169 |
+
check_prompt_size(system + user)
|
| 170 |
+
acquire_hf_slot(client_ip=client_ip)
|
| 171 |
+
ok = False
|
| 172 |
+
try:
|
| 173 |
+
out = _hf_chat(system, user)
|
| 174 |
+
ok = True
|
| 175 |
+
return out
|
| 176 |
+
finally:
|
| 177 |
+
release_hf_slot(success=ok)
|
| 178 |
+
|
| 179 |
+
def _try_ollama() -> str:
|
| 180 |
+
out = _ollama_chat(system, user)
|
| 181 |
+
note_fallback("ollama")
|
| 182 |
+
return out
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
if chosen == "hf":
|
| 186 |
+
text = _try_hf()
|
| 187 |
+
elif chosen == "ollama":
|
| 188 |
+
text = _try_ollama()
|
| 189 |
+
elif chosen == "rule":
|
| 190 |
+
note_fallback("rule")
|
| 191 |
+
text = "" # forces fallback path below
|
| 192 |
+
else:
|
| 193 |
+
raise RuntimeError(f"unknown SAUDA_BACKEND: {chosen}")
|
| 194 |
+
except HFCallDenied as e:
|
| 195 |
+
# Safety gate tripped. Silently downgrade to ollama; if that fails too,
|
| 196 |
+
# the rule-based fallback below kicks in.
|
| 197 |
+
err = f"hf gated ({e.gate}); using ollama"
|
| 198 |
+
served_by = "ollama"
|
| 199 |
+
try:
|
| 200 |
+
text = _try_ollama()
|
| 201 |
+
except Exception as e2:
|
| 202 |
+
err = f"hf gated ({e.gate}); ollama also failed: {type(e2).__name__}"
|
| 203 |
+
served_by = "rule"
|
| 204 |
+
note_fallback("rule")
|
| 205 |
+
except Exception as e:
|
| 206 |
+
err = f"{chosen} backend failed: {type(e).__name__}: {str(e)[:160]}"
|
| 207 |
+
served_by = "ollama" if chosen == "hf" else f"{chosen}+fallback"
|
| 208 |
+
# If primary was HF, try ollama before giving up.
|
| 209 |
+
if chosen == "hf":
|
| 210 |
+
try:
|
| 211 |
+
text = _try_ollama()
|
| 212 |
+
except Exception as e2:
|
| 213 |
+
err = f"hf failed; ollama also failed: {type(e2).__name__}"
|
| 214 |
+
served_by = "rule"
|
| 215 |
+
note_fallback("rule")
|
| 216 |
+
|
| 217 |
+
fallback_price = float(obs_dict.get("own_private_budget") or 100) * 0.3
|
| 218 |
+
if text:
|
| 219 |
+
action = parse_action(text, fallback_price=fallback_price)
|
| 220 |
+
action.pop("_parse_error", None)
|
| 221 |
+
else:
|
| 222 |
+
# Conservative rule-based fallback: open at 35% of ask, escalate by round.
|
| 223 |
+
ask = float(obs_dict.get("seller_asking_price") or obs_dict.get("opponent_last_offer") or 100)
|
| 224 |
+
rnd = int(obs_dict.get("current_round") or 0)
|
| 225 |
+
last = obs_dict.get("own_last_offer")
|
| 226 |
+
if last is None:
|
| 227 |
+
price = round(ask * 0.35, 2)
|
| 228 |
+
else:
|
| 229 |
+
price = round(float(last) + (ask - float(last)) * 0.25, 2)
|
| 230 |
+
action = {"action": "offer", "price": price, "message": ""}
|
| 231 |
+
|
| 232 |
+
if use_steering:
|
| 233 |
+
try:
|
| 234 |
+
action = steer_bayesian_action(obs_dict, action)
|
| 235 |
+
except Exception:
|
| 236 |
+
pass
|
| 237 |
+
|
| 238 |
+
out: dict[str, Any] = {
|
| 239 |
+
"action": str(action.get("action", "offer")),
|
| 240 |
+
"price": action.get("price"),
|
| 241 |
+
"message": action.get("message") or "",
|
| 242 |
+
"backend": served_by,
|
| 243 |
+
}
|
| 244 |
+
if err:
|
| 245 |
+
out["error"] = err
|
| 246 |
+
return out
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def health() -> dict[str, Any]:
|
| 250 |
+
"""Quick reachability probe for both backends. Used by /sauda/health."""
|
| 251 |
+
out: dict[str, Any] = {
|
| 252 |
+
"active_backend": (os.environ.get("SAUDA_BACKEND") or "hf").lower(),
|
| 253 |
+
"hf_configured": bool(os.environ.get("SAUDA_HF_URL")) and bool(
|
| 254 |
+
os.environ.get("SAUDA_HF_TOKEN") or os.environ.get("HF_TOKEN")
|
| 255 |
+
),
|
| 256 |
+
"ollama_url": os.environ.get("SAUDA_OLLAMA_URL", "http://localhost:11434"),
|
| 257 |
+
"ollama_model": os.environ.get("SAUDA_OLLAMA_MODEL", "bestdealbot"),
|
| 258 |
+
}
|
| 259 |
+
# Probe HF (skip if not configured)
|
| 260 |
+
if out["hf_configured"]:
|
| 261 |
+
try:
|
| 262 |
+
url = os.environ["SAUDA_HF_URL"].rstrip("/")
|
| 263 |
+
token = os.environ.get("SAUDA_HF_TOKEN") or os.environ["HF_TOKEN"]
|
| 264 |
+
r = requests.get(url + "/health", headers={"Authorization": f"Bearer {token}"}, timeout=5)
|
| 265 |
+
out["hf_ok"] = r.status_code < 500
|
| 266 |
+
out["hf_status"] = r.status_code
|
| 267 |
+
except Exception as e:
|
| 268 |
+
out["hf_ok"] = False
|
| 269 |
+
out["hf_error"] = f"{type(e).__name__}: {str(e)[:120]}"
|
| 270 |
+
# Probe Ollama
|
| 271 |
+
try:
|
| 272 |
+
host = out["ollama_url"]
|
| 273 |
+
r = requests.get(f"{host}/api/tags", timeout=3)
|
| 274 |
+
out["ollama_ok"] = r.status_code == 200
|
| 275 |
+
if r.status_code == 200:
|
| 276 |
+
tags = [m.get("name", "") for m in r.json().get("models", [])]
|
| 277 |
+
out["ollama_has_model"] = out["ollama_model"] in tags or any(
|
| 278 |
+
t.startswith(out["ollama_model"]) for t in tags
|
| 279 |
+
)
|
| 280 |
+
except Exception as e:
|
| 281 |
+
out["ollama_ok"] = False
|
| 282 |
+
out["ollama_error"] = f"{type(e).__name__}: {str(e)[:120]}"
|
| 283 |
+
# Safety / spend stats (ops use only β don't expose details to UI).
|
| 284 |
+
try:
|
| 285 |
+
from .safety import stats as _safety_stats
|
| 286 |
+
out["safety"] = _safety_stats()
|
| 287 |
+
except Exception:
|
| 288 |
+
pass
|
| 289 |
+
return out
|