paymybills commited on
Commit
2d8d503
Β·
1 Parent(s): 8a342b0

Sync server, bazaarbot_env, and nlp modules from main

Browse files

Brings in 25+ commits of server-side improvements that have been
landing in the GitHub repo since the Space last synced on Apr 16:

- Live HF Inference Endpoint backend (server/sauda_buyer.py) with
Ollama fallback, dual-backend health probe.
- Safety module: per-IP rate limit, daily cap, concurrency cap,
circuit breaker, prompt-size cap (server/safety.py).
- /highlight endpoint for span-level seller-tell extraction
(nlp/keyword_patterns.py with English numeric deception cues
added today).
- LLMSeller persona implementation (bazaarbot_env/llm_seller.py)
using Gemma-4-E4B; auto-accept-when-offer >= reservation;
monotonic counter logic.
- gym_wrapper steering with monotonicity guard fix from today
(max(own_last_offer, ...) shape so ceiling regression can't drag
the buyer backward).

Updates Dockerfile to COPY bazaarbot_env/ and nlp/ alongside server/.

Dockerfile CHANGED
@@ -6,6 +6,8 @@ COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
  COPY server/ ./server/
 
 
9
  COPY openenv.yaml .
10
 
11
  RUN mkdir -p /app/data
 
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
  COPY server/ ./server/
9
+ COPY bazaarbot_env/ ./bazaarbot_env/
10
+ COPY nlp/ ./nlp/
11
  COPY openenv.yaml .
12
 
13
  RUN mkdir -p /app/data
bazaarbot_env/__init__.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone, training-ready BazaarBot negotiation environment.
2
+
3
+ This package is a thin re-export of the core env (`models`, `seller`,
4
+ `environment`, `tasks`) plus a training-oriented wrapper:
5
+
6
+ from bazaarbot_env import BazaarGymEnv, rollout_episode
7
+
8
+ It is importable without FastAPI, uvicorn, or any of the serving stack β€”
9
+ designed to vendor cleanly into a Kaggle notebook or standalone training job.
10
+
11
+ Usage:
12
+ env = BazaarGymEnv(task_name="single_deal", seed=42)
13
+ obs, _ = env.reset()
14
+ while not env.done:
15
+ action = policy(obs) # policy returns dict: {"action": ..., "price": ...}
16
+ obs, reward, done, info = env.step(action)
17
+
18
+ For GRPO-style training over multiple rollouts, use `rollout_episode`.
19
+ """
20
+
21
+ from .models import (
22
+ ActionType,
23
+ BazaarAction,
24
+ BazaarObservation,
25
+ BazaarReward,
26
+ CareerHistory,
27
+ DealOutcome,
28
+ DealRecord,
29
+ EnvironmentState,
30
+ SellerPersonalityType,
31
+ TaskConfig,
32
+ TellObservation,
33
+ )
34
+ from .environment import BazaarEnvironment
35
+ from .seller import SellerPersonality, SellerState, SellerTell
36
+ from .tasks import GRADERS, TASKS
37
+ from .gym_wrapper import (
38
+ DEFAULT_SYSTEM_PROMPT,
39
+ BazaarGymEnv,
40
+ format_observation,
41
+ parse_action,
42
+ rollout_episode,
43
+ steer_bayesian_action,
44
+ strip_think_tags,
45
+ )
46
+
47
+ __all__ = [
48
+ "ActionType",
49
+ "BazaarAction",
50
+ "BazaarEnvironment",
51
+ "BazaarGymEnv",
52
+ "BazaarObservation",
53
+ "BazaarReward",
54
+ "CareerHistory",
55
+ "DealOutcome",
56
+ "DealRecord",
57
+ "DEFAULT_SYSTEM_PROMPT",
58
+ "EnvironmentState",
59
+ "GRADERS",
60
+ "SellerPersonality",
61
+ "SellerPersonalityType",
62
+ "SellerState",
63
+ "SellerTell",
64
+ "TASKS",
65
+ "TaskConfig",
66
+ "TellObservation",
67
+ "format_observation",
68
+ "parse_action",
69
+ "rollout_episode",
70
+ "steer_bayesian_action",
71
+ "strip_think_tags",
72
+ ]
bazaarbot_env/environment.py ADDED
@@ -0,0 +1,543 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Core BazaarBot negotiation environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import math
7
+ import random
8
+ from typing import Optional
9
+
10
+ from .models import (
11
+ ActionType,
12
+ BazaarAction,
13
+ BazaarObservation,
14
+ BazaarReward,
15
+ CareerHistory,
16
+ DealOutcome,
17
+ DealRecord,
18
+ EnvironmentState,
19
+ SellerPersonalityType,
20
+ TaskConfig,
21
+ TellObservation,
22
+ )
23
+ from .seller import SellerPersonality, SellerState, SellerTell
24
+
25
+
26
+ def _tell_to_model(
27
+ tell: SellerTell | None,
28
+ message: str = "",
29
+ history: list[str] | None = None,
30
+ use_nlp: bool = True,
31
+ ) -> TellObservation | None:
32
+ if tell is None:
33
+ return None
34
+
35
+ # NLP layer: extract language-based signals from the seller utterance.
36
+ # Rule-based body-language tells (fidget, posture, eye_contact) are kept
37
+ # from seller.py β€” NLP fills verbal and condition dimensions.
38
+ nlp_verbal: dict = {}
39
+ nlp_condition: dict = {}
40
+ if use_nlp and message:
41
+ try:
42
+ from nlp.extractor import TellExtractor
43
+ _extractor = TellExtractor()
44
+ extracted = _extractor.extract(message, history=history, fast=False)
45
+ nlp_verbal = {
46
+ "verbal_urgency": extracted["verbal_urgency"],
47
+ "verbal_confidence": extracted["verbal_confidence"],
48
+ "verbal_deception_cue": extracted["verbal_deception_cue"],
49
+ "emotional_escalation": extracted["emotional_escalation"],
50
+ "offer_speed": extracted["offer_speed"],
51
+ "concession_pattern": extracted["concession_pattern"],
52
+ }
53
+ nlp_condition = {
54
+ "condition_score": extracted["condition_score"],
55
+ "depreciation_score": extracted["depreciation_score"],
56
+ "condition_label": extracted["condition_label"],
57
+ }
58
+ except Exception:
59
+ pass # extractor unavailable or Ollama down β€” fall back to rule-based
60
+
61
+ # Blend: NLP verbal signals averaged with rule-based where both exist.
62
+ # Rule-based is ground truth for non-verbal (fidget, posture, eye_contact).
63
+ # NLP takes precedence for condition since rule code has no condition signal.
64
+ def _blend(rule_val: float, nlp_val: float | None, nlp_weight: float = 0.55) -> float:
65
+ if nlp_val is None:
66
+ return rule_val
67
+ return round(rule_val * (1 - nlp_weight) + nlp_val * nlp_weight, 3)
68
+
69
+ return TellObservation(
70
+ verbal_urgency=_blend(tell.verbal_urgency, nlp_verbal.get("verbal_urgency")),
71
+ verbal_confidence=_blend(tell.verbal_confidence, nlp_verbal.get("verbal_confidence")),
72
+ verbal_deception_cue=_blend(tell.verbal_deception_cue, nlp_verbal.get("verbal_deception_cue")),
73
+ price_rounding=tell.price_rounding,
74
+ offer_speed=nlp_verbal.get("offer_speed", tell.offer_speed),
75
+ concession_pattern=nlp_verbal.get("concession_pattern", tell.concession_pattern),
76
+ fidget_level=round(tell.fidget_level, 3),
77
+ eye_contact=tell.eye_contact,
78
+ posture=tell.posture,
79
+ repeat_phrases=tell.repeat_phrases,
80
+ topic_changes=tell.topic_changes,
81
+ emotional_escalation=_blend(tell.emotional_escalation, nlp_verbal.get("emotional_escalation")),
82
+ condition_score=nlp_condition.get("condition_score", 1.0),
83
+ depreciation_score=nlp_condition.get("depreciation_score", 0.0),
84
+ condition_label=nlp_condition.get("condition_label", "unknown"),
85
+ )
86
+
87
+
88
+ class BazaarEnvironment:
89
+ """Negotiation environment implementing step/reset/state."""
90
+
91
+ def __init__(self, task: TaskConfig, seed: Optional[int] = None):
92
+ self.task = task
93
+ self.rng = random.Random(seed)
94
+ if seed is not None:
95
+ random.seed(seed)
96
+
97
+ # Episode tracking
98
+ self.current_episode = 0
99
+ self.total_episodes = task.total_episodes
100
+ self.career_history = CareerHistory()
101
+
102
+ # Per-episode state
103
+ self.seller: Optional[SellerState] = None
104
+ self.current_round = 0
105
+ self.done = False
106
+ self.buyer_budget = task.buyer_budget
107
+ self.remaining_bankroll = task.buyer_budget * task.total_episodes
108
+ self.offer_history: list[dict] = []
109
+ self.cumulative_reward = 0.0
110
+ self.step_rewards: list[float] = []
111
+ self.tells_history: list[TellObservation] = []
112
+
113
+ # Stalling detection
114
+ self._repeated_offers = 0
115
+ self._last_buyer_offer: Optional[float] = None
116
+
117
+ # Episode results for career grading
118
+ self.episode_results: list[DealRecord] = []
119
+
120
+ # Snapshot for counterfactual replay
121
+ self._snapshots: dict[int, dict] = {}
122
+
123
+ # Items for variety
124
+ self._items = [
125
+ "handwoven silk scarf", "brass table lamp", "leather messenger bag",
126
+ "ceramic tea set", "sandalwood incense box", "hand-painted pottery",
127
+ "embroidered cushion cover", "copper water bottle", "jute tote bag",
128
+ "wooden chess set",
129
+ ]
130
+
131
+ def _snapshot(self):
132
+ """Save a snapshot of environment state for counterfactual replay."""
133
+ self._snapshots[self.current_round] = {
134
+ "seller": copy.deepcopy(self.seller),
135
+ "offer_history": copy.deepcopy(self.offer_history),
136
+ "done": self.done,
137
+ "cumulative_reward": self.cumulative_reward,
138
+ "step_rewards": list(self.step_rewards),
139
+ "repeated_offers": self._repeated_offers,
140
+ "last_buyer_offer": self._last_buyer_offer,
141
+ "current_round": self.current_round,
142
+ }
143
+
144
+ def restore_snapshot(self, round_num: int) -> bool:
145
+ """Restore environment to state at given round. Returns False if no snapshot."""
146
+ snap = self._snapshots.get(round_num)
147
+ if snap is None:
148
+ return False
149
+ self.seller = copy.deepcopy(snap["seller"])
150
+ self.offer_history = copy.deepcopy(snap["offer_history"])
151
+ self.done = snap["done"]
152
+ self.cumulative_reward = snap["cumulative_reward"]
153
+ self.step_rewards = list(snap["step_rewards"])
154
+ self._repeated_offers = snap["repeated_offers"]
155
+ self._last_buyer_offer = snap["last_buyer_offer"]
156
+ self.current_round = snap["current_round"]
157
+ return True
158
+
159
+ def reset(self) -> BazaarObservation:
160
+ """Reset for next episode."""
161
+ self.current_episode += 1
162
+ self.current_round = 0
163
+ self.done = False
164
+ self.offer_history = []
165
+ self.step_rewards = []
166
+ self.tells_history = []
167
+ self._repeated_offers = 0
168
+ self._last_buyer_offer = None
169
+ self._snapshots = {}
170
+
171
+ # Map personality enum
172
+ personality = SellerPersonality(self.task.seller_personality.value)
173
+
174
+ # Per-episode listing: sample from real dataset when enabled, else
175
+ # fall back to the task's static cost/budget + hardcoded items list.
176
+ listing = None
177
+ if self.task.use_real_listings:
178
+ from .listings import sample_listing
179
+ listing = sample_listing(self.rng)
180
+
181
+ if listing is not None:
182
+ episode_cost = listing["seller_cost"]
183
+ episode_anchor = listing["seller_anchor"]
184
+ self.buyer_budget = listing["buyer_budget"]
185
+ item = listing["name"]
186
+ else:
187
+ episode_cost = self.task.seller_cost
188
+ episode_anchor = self.task.seller_cost * self.task.seller_anchor_multiplier
189
+ item = self._items[(self.current_episode - 1) % len(self._items)]
190
+
191
+ # Create seller for this episode
192
+ self.seller = SellerState(
193
+ cost=episode_cost,
194
+ anchor=episode_anchor,
195
+ base_concession_rate=self.task.seller_concession_rate,
196
+ inventory=self.task.seller_inventory,
197
+ initial_inventory=self.task.seller_inventory,
198
+ batna_probability=self.task.seller_batna_probability,
199
+ max_rounds=self.task.max_steps if self.task.total_episodes == 1 else self.task.max_steps // self.task.total_episodes,
200
+ personality=personality,
201
+ _rng=self.rng,
202
+ )
203
+
204
+ # Career mode: update seller with buyer history
205
+ if self.task.enable_career and self.career_history.deals:
206
+ self.seller.update_career_info(self.career_history.capitulation_rate)
207
+
208
+ from .seller import _pick_message
209
+ open_msg = _pick_message(
210
+ personality, "open", self.rng,
211
+ item=item, price=self.seller.anchor, cost=self.task.seller_cost,
212
+ )
213
+
214
+ obs = BazaarObservation(
215
+ current_round=0,
216
+ max_rounds=self.seller.max_rounds,
217
+ own_last_offer=None,
218
+ opponent_last_offer=self.seller.anchor,
219
+ own_private_deadline=self.task.buyer_deadline,
220
+ own_private_budget=self.buyer_budget,
221
+ rounds_remaining=self.seller.max_rounds,
222
+ seller_last_move_delta=None,
223
+ item_name=item,
224
+ seller_asking_price=self.seller.anchor,
225
+ seller_personality=self.task.seller_personality,
226
+ episode_number=self.current_episode,
227
+ total_episodes=self.total_episodes,
228
+ career_history=self.career_history if self.task.enable_career else None,
229
+ done=False,
230
+ message=f'Seller opens: "{open_msg}"',
231
+ )
232
+
233
+ self.offer_history.append({
234
+ "round": 0,
235
+ "actor": "seller",
236
+ "action": "open",
237
+ "price": self.seller.anchor,
238
+ })
239
+
240
+ self._snapshot()
241
+ return obs
242
+
243
+ def step(self, action: BazaarAction) -> tuple[BazaarObservation, BazaarReward]:
244
+ """Process buyer action and return new observation + reward."""
245
+ if self.done:
246
+ obs = self._make_obs(message="Negotiation already concluded.")
247
+ obs.done = True
248
+ return obs, BazaarReward(reward=0.0, terminal=True)
249
+
250
+ self._snapshot()
251
+ self.current_round += 1
252
+ reward_components: dict[str, float] = {}
253
+ penalty = 0.0
254
+
255
+ # Validate action
256
+ if action.action == ActionType.OFFER:
257
+ if action.price is None:
258
+ action.price = self.buyer_budget * 0.5
259
+ if action.price < 0 or action.price > self.buyer_budget:
260
+ penalty -= 0.2
261
+ reward_components["out_of_range_penalty"] = -0.2
262
+ action.price = max(0, min(action.price, self.buyer_budget))
263
+
264
+ if self._last_buyer_offer is not None and abs(action.price - self._last_buyer_offer) < 0.5:
265
+ self._repeated_offers += 1
266
+ if self._repeated_offers >= 3:
267
+ penalty -= 0.1
268
+ reward_components["stalling_penalty"] = -0.1
269
+ else:
270
+ self._repeated_offers = 0
271
+ self._last_buyer_offer = action.price
272
+
273
+ # Record buyer action
274
+ self.offer_history.append({
275
+ "round": self.current_round,
276
+ "actor": "buyer",
277
+ "action": action.action.value,
278
+ "price": action.price,
279
+ })
280
+
281
+ # Process action
282
+ if action.action == ActionType.WALK:
283
+ return self._handle_walk(reward_components, penalty)
284
+ elif action.action == ActionType.ACCEPT:
285
+ return self._handle_accept(reward_components, penalty)
286
+ else:
287
+ return self._handle_offer(action.price, reward_components, penalty)
288
+
289
+ def _handle_walk(self, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
290
+ self.done = True
291
+ walk_penalty = -0.3
292
+ components["walk_penalty"] = walk_penalty
293
+ total = walk_penalty + penalty
294
+
295
+ self._record_deal(DealOutcome.WALK, None, self.current_round)
296
+
297
+ obs = self._make_obs(message="You walk away from the deal.")
298
+ obs.done = True
299
+ obs.deal_outcome = DealOutcome.WALK
300
+
301
+ reward = BazaarReward(reward=total, terminal=True, components=components)
302
+ self.step_rewards.append(total)
303
+ self.cumulative_reward += total
304
+ return obs, reward
305
+
306
+ def _handle_accept(self, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
307
+ if self.seller is None or not self.seller.offer_history:
308
+ obs = self._make_obs(message="No seller offer to accept yet. Make an offer first.")
309
+ reward = BazaarReward(reward=-0.1 + penalty, terminal=False, components={"invalid_accept": -0.1})
310
+ self.step_rewards.append(reward.reward)
311
+ self.cumulative_reward += reward.reward
312
+ return obs, reward
313
+
314
+ agreed_price = self.seller.current_offer
315
+ return self._finalize_deal(agreed_price, components, penalty, buyer_accepted=True)
316
+
317
+ def _handle_offer(self, price: float, components: dict, penalty: float) -> tuple[BazaarObservation, BazaarReward]:
318
+ assert self.seller is not None
319
+
320
+ seller_action, seller_price, tell, msg = self.seller.respond(price, self.current_round)
321
+
322
+ # Build conversation history for NLP context (last 4 turns)
323
+ recent_history = [
324
+ f"{h['actor']}: {h.get('price', '')}" for h in self.offer_history[-4:]
325
+ ]
326
+
327
+ # Record tell β€” NLP layer blends language signals into rule-based tells
328
+ use_nlp = getattr(self.task, "enable_nlp", False)
329
+ tell_model = _tell_to_model(tell, message=msg, history=recent_history, use_nlp=use_nlp)
330
+ if tell_model and self.task.enable_tells:
331
+ self.tells_history.append(tell_model)
332
+
333
+ if seller_action == "accept":
334
+ self.offer_history.append({
335
+ "round": self.current_round,
336
+ "actor": "seller",
337
+ "action": "accept",
338
+ "price": price,
339
+ })
340
+ return self._finalize_deal(price, components, penalty, buyer_accepted=False, message=msg)
341
+
342
+ elif seller_action == "walk":
343
+ self.done = True
344
+ components["seller_walked"] = -0.2
345
+ self._record_deal(DealOutcome.WALK, None, self.current_round)
346
+
347
+ obs = self._make_obs(message=f'Seller: "{msg}"')
348
+ obs.done = True
349
+ obs.deal_outcome = DealOutcome.WALK
350
+ obs.tells = tell_model if self.task.enable_tells else None
351
+
352
+ total = -0.2 + penalty
353
+ reward = BazaarReward(reward=total, terminal=True, components=components)
354
+ self.step_rewards.append(total)
355
+ self.cumulative_reward += total
356
+ return obs, reward
357
+
358
+ else: # counter
359
+ self.offer_history.append({
360
+ "round": self.current_round,
361
+ "actor": "seller",
362
+ "action": "counter",
363
+ "price": seller_price,
364
+ })
365
+
366
+ # Partial progress reward
367
+ initial_gap = self.seller.anchor - 0
368
+ current_gap = abs(seller_price - price)
369
+ if len(self.offer_history) >= 4:
370
+ prev_seller = [h["price"] for h in self.offer_history if h["actor"] == "seller" and h["price"] is not None]
371
+ prev_buyer = [h["price"] for h in self.offer_history if h["actor"] == "buyer" and h["price"] is not None]
372
+ if len(prev_seller) >= 2 and len(prev_buyer) >= 2:
373
+ old_gap = abs(prev_seller[-2] - prev_buyer[-2])
374
+ gap_reduction = old_gap - current_gap
375
+ if gap_reduction > 0 and initial_gap > 0:
376
+ progress = 0.05 * (gap_reduction / initial_gap)
377
+ components["gap_narrowing"] = round(progress, 4)
378
+
379
+ # Check if max rounds exceeded
380
+ rounds_per_ep = self.seller.max_rounds
381
+ if self.current_round >= rounds_per_ep:
382
+ self.done = True
383
+ self._record_deal(DealOutcome.EXPIRED, None, self.current_round)
384
+
385
+ obs = self._make_obs(message="Time's up. No deal reached.")
386
+ obs.done = True
387
+ obs.deal_outcome = DealOutcome.EXPIRED
388
+ obs.tells = tell_model if self.task.enable_tells else None
389
+ components["expired_penalty"] = -0.15
390
+ total = sum(components.values()) + penalty
391
+ reward = BazaarReward(reward=total, terminal=True, components=components)
392
+ self.step_rewards.append(total)
393
+ self.cumulative_reward += total
394
+ return obs, reward
395
+
396
+ # Seller delta
397
+ seller_delta = None
398
+ seller_offers = [h["price"] for h in self.offer_history if h["actor"] == "seller" and h["price"] is not None]
399
+ if len(seller_offers) >= 2:
400
+ seller_delta = round(seller_offers[-2] - seller_offers[-1], 2)
401
+
402
+ total = sum(components.values()) + penalty
403
+ obs = self._make_obs(message=f'Seller: "{msg}"')
404
+ obs.opponent_last_offer = seller_price
405
+ obs.own_last_offer = price
406
+ obs.seller_last_move_delta = seller_delta
407
+ obs.rounds_remaining = rounds_per_ep - self.current_round
408
+ obs.tells = tell_model if self.task.enable_tells else None
409
+
410
+ reward = BazaarReward(reward=total, terminal=False, components=components)
411
+ self.step_rewards.append(total)
412
+ self.cumulative_reward += total
413
+ return obs, reward
414
+
415
+ def _finalize_deal(
416
+ self, agreed_price: float, components: dict, penalty: float,
417
+ buyer_accepted: bool, message: str | None = None,
418
+ ) -> tuple[BazaarObservation, BazaarReward]:
419
+ self.done = True
420
+ assert self.seller is not None
421
+
422
+ budget = self.buyer_budget
423
+ cost = self.seller.cost
424
+ surplus = budget - agreed_price
425
+ max_surplus = budget - cost
426
+ normalized_surplus = surplus / max_surplus if max_surplus > 0 else 0
427
+ normalized_surplus = max(0, min(1, normalized_surplus))
428
+
429
+ alpha, beta = 0.3, 2.5
430
+ t_frac = self.current_round / max(self.seller.max_rounds, 1)
431
+ time_discount = math.exp(-alpha * math.exp(beta * t_frac))
432
+
433
+ rep_leak = 0.0
434
+ if self.task.enable_career and len(self.career_history.deals) >= 3:
435
+ cap_rate = self.career_history.capitulation_rate
436
+ rep_leak = -0.1 * cap_rate
437
+ components["reputation_leak"] = rep_leak
438
+
439
+ capitulated = agreed_price > self.seller.anchor * 0.85
440
+
441
+ terminal_reward = normalized_surplus * time_discount
442
+ components["surplus"] = round(normalized_surplus, 4)
443
+ components["time_discount"] = round(time_discount, 4)
444
+ components["terminal_reward"] = round(terminal_reward, 4)
445
+
446
+ total = terminal_reward + rep_leak + penalty
447
+ total = max(0, min(1, total))
448
+
449
+ self._record_deal(DealOutcome.DEAL, agreed_price, self.current_round, capitulated)
450
+ self.remaining_bankroll -= agreed_price
451
+
452
+ if message is None:
453
+ msg = f"Deal! Agreed at {agreed_price:.0f} rupees."
454
+ if buyer_accepted:
455
+ msg = f"You accept the seller's offer of {agreed_price:.0f} rupees."
456
+ else:
457
+ msg = message
458
+
459
+ obs = self._make_obs(message=msg)
460
+ obs.done = True
461
+ obs.deal_outcome = DealOutcome.DEAL
462
+
463
+ reward = BazaarReward(reward=round(total, 4), terminal=True, components=components)
464
+ self.step_rewards.append(total)
465
+ self.cumulative_reward += total
466
+ return obs, reward
467
+
468
+ def _record_deal(self, outcome: DealOutcome, agreed_price: Optional[float], rounds: int, capitulated: bool = False):
469
+ surplus = 0.0
470
+ norm_surplus = 0.0
471
+ if agreed_price is not None:
472
+ surplus = self.buyer_budget - agreed_price
473
+ seller_cost_for_deal = (
474
+ self.seller.cost if self.seller is not None else self.task.seller_cost
475
+ )
476
+ max_surplus = self.buyer_budget - seller_cost_for_deal
477
+ norm_surplus = surplus / max_surplus if max_surplus > 0 else 0
478
+
479
+ record = DealRecord(
480
+ episode=self.current_episode,
481
+ outcome=outcome,
482
+ agreed_price=agreed_price,
483
+ rounds_taken=rounds,
484
+ buyer_surplus=surplus,
485
+ normalized_surplus=norm_surplus,
486
+ buyer_capitulated=capitulated,
487
+ )
488
+ self.career_history.deals.append(record)
489
+ self.episode_results.append(record)
490
+
491
+ deals = self.career_history.deals
492
+ k = min(len(deals), 10)
493
+ recent = deals[-k:]
494
+ cap_count = sum(1 for d in recent if d.buyer_capitulated)
495
+ self.career_history.capitulation_rate = cap_count / k
496
+
497
+ completed = [d for d in recent if d.outcome == DealOutcome.DEAL]
498
+ if completed:
499
+ self.career_history.avg_normalized_surplus = sum(d.normalized_surplus for d in completed) / len(completed)
500
+ self.career_history.avg_rounds_to_close = sum(d.rounds_taken for d in completed) / len(completed)
501
+
502
+ def _make_obs(self, message: str = "") -> BazaarObservation:
503
+ rounds_per_ep = self.seller.max_rounds if self.seller else self.task.max_steps
504
+ return BazaarObservation(
505
+ current_round=self.current_round,
506
+ max_rounds=rounds_per_ep,
507
+ own_last_offer=self._last_buyer_offer,
508
+ opponent_last_offer=self.seller.current_offer if self.seller else None,
509
+ own_private_deadline=self.task.buyer_deadline,
510
+ own_private_budget=self.buyer_budget,
511
+ rounds_remaining=max(0, rounds_per_ep - self.current_round),
512
+ seller_last_move_delta=None,
513
+ item_name=self._items[(self.current_episode - 1) % len(self._items)] if self.current_episode > 0 else "item",
514
+ seller_asking_price=self.seller.anchor if self.seller else 0,
515
+ seller_personality=self.task.seller_personality,
516
+ episode_number=self.current_episode,
517
+ total_episodes=self.total_episodes,
518
+ career_history=self.career_history if self.task.enable_career else None,
519
+ done=self.done,
520
+ message=message,
521
+ )
522
+
523
+ def get_state(self) -> EnvironmentState:
524
+ return EnvironmentState(
525
+ task_name=self.task.name,
526
+ episode=self.current_episode,
527
+ total_episodes=self.total_episodes,
528
+ current_round=self.current_round,
529
+ max_rounds=self.seller.max_rounds if self.seller else self.task.max_steps,
530
+ done=self.done,
531
+ buyer_budget=self.buyer_budget,
532
+ seller_cost=self.task.seller_cost,
533
+ seller_anchor=self.seller.anchor if self.seller else 0,
534
+ seller_personality=self.task.seller_personality,
535
+ offer_history=self.offer_history,
536
+ career_history=self.career_history if self.task.enable_career else None,
537
+ cumulative_reward=self.cumulative_reward,
538
+ tells_history=self.tells_history,
539
+ )
540
+
541
+ @property
542
+ def all_episodes_done(self) -> bool:
543
+ return self.current_episode >= self.total_episodes and self.done
bazaarbot_env/gym_wrapper.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Training-friendly wrapper over BazaarEnvironment.
2
+
3
+ Exposes a minimal in-process API (no HTTP) for RL training. The wrapper:
4
+
5
+ - Accepts actions as plain dicts: ``{"action": "offer|accept|walk", "price": float | None}``.
6
+ - Emits observations as plain dicts with every field the LLM prompt needs.
7
+ - Terminates when the environment's current episode ends. For career tasks
8
+ (multiple episodes), call `reset_episode()` between episodes and sum
9
+ terminal rewards β€” each episode's terminal reward is the GRPO advantage unit.
10
+ - Provides `format_observation()` so the same prompt string is used at train
11
+ time and inference time.
12
+ - Provides `rollout_episode(policy_fn, ...)` as the GRPO rollout primitive:
13
+ returns a list of (prompt, action_text, reward) tuples plus the final
14
+ graded score.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import copy
20
+ import json
21
+ import random
22
+ import textwrap
23
+ from typing import Any, Callable, Optional
24
+
25
+ from .environment import BazaarEnvironment
26
+ from .models import (
27
+ ActionType,
28
+ BazaarAction,
29
+ BazaarObservation,
30
+ SellerPersonalityType,
31
+ TaskConfig,
32
+ )
33
+ from .tasks import GRADERS, TASKS
34
+
35
+
36
+ # Keep in sync with inference.py's system prompt so training and eval
37
+ # see the same conditioning. Few-shot examples are inline so a cold
38
+ # (un-SFT'd) base model has the pattern to copy.
39
+ DEFAULT_SYSTEM_PROMPT = textwrap.dedent("""\
40
+ You are a buyer at an Indian bazaar. Your ONLY output is one JSON object.
41
+
42
+ Rules:
43
+ - Seller's opening price is inflated. Negotiate down.
44
+ - Never reveal your budget.
45
+ - Close early at a good price; don't grind for pennies.
46
+ - The "message" is what you'd actually say to the seller β€” short Hinglish/English line.
47
+
48
+ Output schema (pick ONE per turn):
49
+ {"action": "offer", "price": <number>, "message": "<one short line>"}
50
+ {"action": "accept", "price": null, "message": "<one short line>"}
51
+ {"action": "walk", "price": null, "message": "<one short line>"}
52
+
53
+ Examples:
54
+
55
+ Seller's ask: 100. Your budget: 200.
56
+ {"action": "offer", "price": 35, "message": "yaar 35 max, market mein isse kam mil jaata hai"}
57
+
58
+ Seller's ask: 45. Your budget: 200.
59
+ {"action": "accept", "price": null, "message": "okay deal"}
60
+
61
+ Seller's ask: 180. Your budget: 200.
62
+ {"action": "walk", "price": null, "message": "sorry boss, itna nahi de sakta"}
63
+
64
+ Output ONE JSON object. No prose outside JSON. No markdown. No thinking.
65
+ """)
66
+
67
+
68
+ def _obs_to_dict(obs: BazaarObservation) -> dict[str, Any]:
69
+ return obs.model_dump(mode="json")
70
+
71
+
72
+ def format_observation(
73
+ obs: dict[str, Any] | BazaarObservation,
74
+ history: Optional[list[str]] = None,
75
+ ) -> str:
76
+ """Format an observation as the user-turn prompt.
77
+
78
+ Mirrors the schema used by `inference.py::build_user_prompt` so the
79
+ policy sees the same text distribution at train and eval time.
80
+ """
81
+ if isinstance(obs, BazaarObservation):
82
+ obs = _obs_to_dict(obs)
83
+
84
+ history_block = "\n".join((history or [])[-6:]) if history else "None"
85
+
86
+ career_info = ""
87
+ if obs.get("career_history"):
88
+ ch = obs["career_history"]
89
+ career_info = textwrap.dedent(f"""\
90
+ --- Career History ---
91
+ Episodes completed: {len(ch.get('deals', []))}
92
+ Your capitulation rate: {ch.get('capitulation_rate', 0):.1%}
93
+ Avg surplus captured: {ch.get('avg_normalized_surplus', 0):.1%}
94
+ Avg rounds to close: {ch.get('avg_rounds_to_close', 0):.1f}
95
+ """)
96
+
97
+ deadline_info = ""
98
+ if obs.get("own_private_deadline"):
99
+ deadline_info = (
100
+ f"YOUR HARD DEADLINE: Round {obs['own_private_deadline']} "
101
+ "(seller doesn't know this!)\n"
102
+ )
103
+
104
+ # Tells block β€” only rendered when the observation actually carries
105
+ # a tells dict. This is the in-loop training signal we want to teach
106
+ # the buyer to use during SFT/GRPO. At eval time the same block
107
+ # appears whenever enable_tells/enable_nlp is on, so the buyer sees
108
+ # the same prompt distribution at train and eval.
109
+ tells_block = ""
110
+ tells = obs.get("tells")
111
+ if tells:
112
+ urgency = float(tells.get("verbal_urgency", 0.0))
113
+ deception = float(tells.get("verbal_deception_cue", 0.0))
114
+ confidence = float(tells.get("verbal_confidence", 0.5))
115
+ concession = str(tells.get("concession_pattern", "steady"))
116
+ emotional = float(tells.get("emotional_escalation", 0.0))
117
+ repeat = int(tells.get("repeat_phrases", 0))
118
+ condition = str(tells.get("condition_label", "unknown"))
119
+ tells_block = textwrap.dedent(f"""\
120
+ --- Seller Tells (noisy signals β€” read with skepticism) ---
121
+ urgency: {urgency:.2f} deception_cue: {deception:.2f} confidence: {confidence:.2f}
122
+ concession_pattern: {concession} emotional_escalation: {emotional:.2f} repeat_phrases: {repeat}
123
+ condition: {condition}
124
+
125
+ """)
126
+
127
+ return textwrap.dedent(f"""\
128
+ --- Negotiation State ---
129
+ Item: {obs.get('item_name', 'item')}
130
+ Round: {obs.get('current_round', 0)} / {obs.get('max_rounds', 0)}
131
+ Rounds remaining: {obs.get('rounds_remaining', 0)}
132
+ Seller's current ask: {obs.get('opponent_last_offer', 'N/A')}
133
+ Your last offer: {obs.get('own_last_offer', 'N/A')}
134
+ Your private budget: {obs.get('own_private_budget', 0)}
135
+ Seller's opening price: {obs.get('seller_asking_price', 0)}
136
+ {deadline_info}\
137
+ Seller's last concession: {obs.get('seller_last_move_delta', 'N/A')} rupees
138
+ Episode: {obs.get('episode_number', 1)} / {obs.get('total_episodes', 1)}
139
+
140
+ {career_info}\
141
+ {tells_block}\
142
+ --- Recent History ---
143
+ {history_block}
144
+
145
+ Seller says: {obs.get('message', '')}
146
+
147
+ Your move (JSON only):
148
+ """)
149
+
150
+
151
+ def strip_think_tags(chat_text: str) -> str:
152
+ """NO-OP: kept for API compatibility.
153
+
154
+ We initially stripped Qwen3.5's auto-injected <think>...</think>
155
+ blocks from prompts and SFT targets, intending to teach the model
156
+ to skip reasoning and go straight to JSON. In practice the first
157
+ SFT run happened before the strip was wired in, so the trained
158
+ LoRA actually expects to see <think>\\n\\n</think>\\n\\n preceding
159
+ its JSON output.
160
+
161
+ Rather than redo SFT, we leave the chat template untouched and let
162
+ parse_action() discard the leading think block at parse time.
163
+ """
164
+ return chat_text
165
+
166
+
167
+ def parse_action(text: str, fallback_price: float = 30.0) -> dict[str, Any]:
168
+ """Best-effort JSON parser for LLM action output.
169
+
170
+ Robust to the common failure modes: markdown fences, leading prose,
171
+ trailing commentary, reasoning-mode <think>...</think> blocks. Falls
172
+ back to a conservative offer if unparseable so training never crashes
173
+ on a bad generation.
174
+ """
175
+ import re
176
+ s = text.strip()
177
+ # Drop any <think>...</think> blocks before looking for JSON
178
+ s = re.sub(r"<think>.*?</think>", "", s, flags=re.DOTALL).strip()
179
+ if "```" in s:
180
+ parts = s.split("```")
181
+ if len(parts) >= 2:
182
+ s = parts[1]
183
+ if s.lstrip().startswith("json"):
184
+ s = s.lstrip()[4:]
185
+ start = s.find("{")
186
+ end = s.rfind("}") + 1
187
+ if start >= 0 and end > start:
188
+ s = s[start:end]
189
+ try:
190
+ parsed = json.loads(s)
191
+ if parsed.get("action") not in ("offer", "accept", "walk"):
192
+ return {"action": "offer", "price": fallback_price, "message": "", "_parse_error": True}
193
+ # Ensure message field exists (older models may not return it)
194
+ parsed.setdefault("message", "")
195
+ return parsed
196
+ except Exception:
197
+ return {"action": "offer", "price": fallback_price, "message": "", "_parse_error": True}
198
+
199
+
200
+ def steer_bayesian_action(
201
+ obs: dict[str, Any] | BazaarObservation,
202
+ proposed_action: dict[str, Any],
203
+ ) -> dict[str, Any]:
204
+ """Apply Bayesian-persuasion-inspired steering + adaptive fallback.
205
+
206
+ The model has incomplete information, so we maintain a compact posterior over
207
+ seller urgency/flexibility from tells and concession behavior, then gate the
208
+ raw model action with:
209
+ - a Nash-style target offer (under estimated seller cost),
210
+ - an adaptive close threshold near deadline (to reduce unnecessary walks),
211
+ - anti-premature-walk logic that prefers one more calibrated counter.
212
+ """
213
+ if isinstance(obs, BazaarObservation):
214
+ obs = _obs_to_dict(obs)
215
+
216
+ original_action = str(proposed_action.get("action", "offer"))
217
+ original_price = proposed_action.get("price")
218
+ original_message = str(proposed_action.get("message") or "")
219
+
220
+ action = {
221
+ "action": original_action,
222
+ "price": original_price,
223
+ "message": original_message,
224
+ }
225
+
226
+ ask = float(obs.get("opponent_last_offer") or obs.get("seller_asking_price") or 0.0)
227
+ budget = float(obs.get("own_private_budget") or 0.0)
228
+ turn_index_early = int(obs.get("current_round") or 0)
229
+ max_rounds_early = int(obs.get("max_rounds") or 8)
230
+ if ask <= 0 or budget <= 0:
231
+ if action["action"] == "offer" and action.get("price") is None:
232
+ action["price"] = round(max(1.0, fallback := budget * 0.3 if budget > 0 else 30.0), 2)
233
+ if not action.get("message"):
234
+ from nlp.templates import render
235
+ action["message"] = render(
236
+ action["action"], action.get("price"),
237
+ ask=ask, turn_index=turn_index_early, max_turns=max_rounds_early,
238
+ )
239
+ return action
240
+
241
+ rounds_remaining = int(obs.get("rounds_remaining") or 0)
242
+ max_rounds = max(1, int(obs.get("max_rounds") or rounds_remaining or 1))
243
+ current_round = int(obs.get("current_round") or (max_rounds - rounds_remaining))
244
+ late_pressure = max(0.0, min(1.0, current_round / max_rounds))
245
+
246
+ personality = str(obs.get("seller_personality") or "default")
247
+ prior_urgency = {
248
+ "default": 0.50,
249
+ "deceptive": 0.45,
250
+ "impatient": 0.68,
251
+ "collaborative": 0.40,
252
+ }.get(personality, 0.50)
253
+ prior_flex = {
254
+ "default": 0.50,
255
+ "deceptive": 0.30,
256
+ "impatient": 0.65,
257
+ "collaborative": 0.72,
258
+ }.get(personality, 0.50)
259
+
260
+ tells = obs.get("tells") or {}
261
+ verbal_urgency = float(tells.get("verbal_urgency") or 0.0)
262
+ fidget = float(tells.get("fidget_level") or 0.0)
263
+ emotional = float(tells.get("emotional_escalation") or 0.0)
264
+ deception = float(tells.get("verbal_deception_cue") or 0.0)
265
+ confidence = float(tells.get("verbal_confidence") or 0.5)
266
+ speed = str(tells.get("offer_speed") or "normal")
267
+ concession_pattern = str(tells.get("concession_pattern") or "steady")
268
+
269
+ speed_urgency = {"instant": 0.15, "normal": 0.05, "deliberate": -0.05}.get(speed, 0.0)
270
+ pattern_urgency = {
271
+ "front_loaded": 0.15,
272
+ "erratic": 0.08,
273
+ "stalling": -0.10,
274
+ "steady": 0.00,
275
+ }.get(concession_pattern, 0.0)
276
+ signal_urgency = max(
277
+ 0.0,
278
+ min(
279
+ 1.0,
280
+ 0.35 * verbal_urgency
281
+ + 0.25 * fidget
282
+ + 0.20 * emotional
283
+ + 0.10 * deception
284
+ + 0.10 * (1.0 - confidence)
285
+ + speed_urgency
286
+ + pattern_urgency,
287
+ ),
288
+ )
289
+
290
+ seller_delta = float(obs.get("seller_last_move_delta") or 0.0)
291
+ concession_ratio = max(0.0, min(1.0, seller_delta / max(ask, 1.0)))
292
+ pattern_flex = {
293
+ "front_loaded": 0.22,
294
+ "steady": 0.08,
295
+ "erratic": 0.03,
296
+ "stalling": -0.18,
297
+ }.get(concession_pattern, 0.0)
298
+ signal_flex = max(
299
+ 0.0,
300
+ min(
301
+ 1.0,
302
+ 0.45 * concession_ratio
303
+ + 0.20 * (1.0 - confidence)
304
+ + 0.20 * verbal_urgency
305
+ + 0.15 * (1.0 - deception)
306
+ + pattern_flex,
307
+ ),
308
+ )
309
+
310
+ posterior_urgency = max(0.0, min(1.0, 0.55 * prior_urgency + 0.45 * signal_urgency))
311
+ posterior_flex = max(0.0, min(1.0, 0.55 * prior_flex + 0.45 * signal_flex))
312
+
313
+ estimated_cost = ask * (0.58 - 0.18 * posterior_urgency + 0.08 * (1.0 - posterior_flex))
314
+ estimated_cost = max(1.0, min(estimated_cost, ask * 0.90))
315
+
316
+ # Nash bargaining point under estimated seller cost and inferred buyer power.
317
+ buyer_power = 0.35 + 0.40 * posterior_urgency + 0.20 * posterior_flex - 0.30 * late_pressure
318
+ buyer_power = max(0.20, min(0.85, buyer_power))
319
+ nash_target = (1.0 - buyer_power) * budget + buyer_power * estimated_cost
320
+ nash_target = max(1.0, min(nash_target, min(budget * 0.95, ask * 1.02)))
321
+
322
+ # Adaptive fallback: grow acceptance threshold late so we close more often.
323
+ close_slack = 0.28 + 0.45 * late_pressure + 0.12 * (1.0 - posterior_urgency)
324
+ accept_threshold = nash_target + (budget - nash_target) * close_slack
325
+ accept_threshold = min(accept_threshold, budget * 0.95)
326
+
327
+ floor_offer = max(1.0, min(nash_target * 0.85, ask * 0.65, budget * 0.85))
328
+ ceiling_offer = min(accept_threshold, ask * (0.90 + 0.08 * late_pressure))
329
+ if rounds_remaining <= 2:
330
+ floor_offer = max(floor_offer, ask * 0.87)
331
+ ceiling_offer = max(ceiling_offer, floor_offer)
332
+ if ceiling_offer < floor_offer:
333
+ floor_offer = ceiling_offer
334
+
335
+ own_last_offer = obs.get("own_last_offer")
336
+ own_last_offer = float(own_last_offer) if own_last_offer is not None else None
337
+
338
+ def _finalize(out: dict) -> dict:
339
+ """Re-message via template if steerer changed action or moved price β‰₯10%."""
340
+ new_action = out["action"]
341
+ new_price = out.get("price")
342
+ action_changed = new_action != original_action
343
+ price_changed = (
344
+ original_price is not None
345
+ and new_price is not None
346
+ and abs(float(new_price) - float(original_price)) / max(float(original_price), 1.0) > 0.10
347
+ )
348
+ if action_changed or price_changed or not original_message:
349
+ from nlp.templates import render
350
+ out["message"] = render(
351
+ new_action, new_price,
352
+ ask=ask, turn_index=current_round, max_turns=max_rounds,
353
+ )
354
+ else:
355
+ out["message"] = original_message
356
+ return out
357
+
358
+ if action["action"] == "accept":
359
+ if ask > accept_threshold and rounds_remaining > 1:
360
+ action["action"] = "offer"
361
+ action["price"] = round(max(floor_offer, min(ceiling_offer, nash_target)), 2)
362
+ else:
363
+ action["price"] = None
364
+ return _finalize(action)
365
+
366
+ if action["action"] == "walk":
367
+ if rounds_remaining <= 1 and ask > budget * 0.98:
368
+ action["price"] = None
369
+ return _finalize(action)
370
+ # Anti-premature walk: take one calibrated close attempt first.
371
+ if ask <= accept_threshold and rounds_remaining <= 2:
372
+ action["action"] = "accept"
373
+ action["price"] = None
374
+ return _finalize(action)
375
+ action["action"] = "offer"
376
+ probe_start = own_last_offer if own_last_offer is not None else floor_offer
377
+ probe_price = max(floor_offer, min(ceiling_offer, probe_start + max(1.0, ask * 0.06)))
378
+ action["price"] = round(probe_price, 2)
379
+ return _finalize(action)
380
+
381
+ # Offer path: clip to Bayesian/Nash band and auto-close late if ask is acceptable.
382
+ if rounds_remaining <= 1 and ask <= accept_threshold:
383
+ return _finalize({"action": "accept", "price": None, "message": ""})
384
+
385
+ proposed_price = action.get("price")
386
+ if proposed_price is None:
387
+ proposed_price = (floor_offer + ceiling_offer) / 2
388
+ proposed_price = float(proposed_price)
389
+ steered_price = max(floor_offer, min(ceiling_offer, proposed_price))
390
+ # Buyer offers must move monotonically toward seller. If the model proposes
391
+ # a price below our previous offer (which the seller has already implicitly
392
+ # rejected by countering), bump up to at least last + a small concession
393
+ # toward the seller's ask. Without this the buyer can slide *backward*
394
+ # mid-negotiation, which sellers correctly read as either incoherent or
395
+ # bad-faith.
396
+ if own_last_offer is not None and steered_price < own_last_offer:
397
+ gap = max(0.0, ask - own_last_offer)
398
+ bump = max(1.0, gap * 0.15)
399
+ # Hold at last offer if ceiling has fallen below it; never retreat.
400
+ # Clamping to min(ceiling, ...) here would let ceiling drag us backward
401
+ # β€” the exact bug the guard is meant to prevent.
402
+ target = max(own_last_offer, min(ceiling_offer, own_last_offer + bump))
403
+ steered_price = target
404
+ action["price"] = round(steered_price, 2)
405
+ action["action"] = "offer"
406
+ return _finalize(action)
407
+
408
+
409
+ class BazaarGymEnv:
410
+ """Minimal gym-like wrapper over BazaarEnvironment for in-process training."""
411
+
412
+ def __init__(
413
+ self,
414
+ task_name: str = "single_deal",
415
+ seed: Optional[int] = None,
416
+ personality_override: Optional[str] = None,
417
+ ):
418
+ if task_name not in TASKS:
419
+ raise ValueError(
420
+ f"Unknown task: {task_name}. Available: {list(TASKS.keys())}"
421
+ )
422
+ self.task_name = task_name
423
+ self.seed = seed
424
+ self._base_task = copy.deepcopy(TASKS[task_name])
425
+ if personality_override:
426
+ self._base_task.seller_personality = SellerPersonalityType(
427
+ personality_override
428
+ )
429
+ self._env: Optional[BazaarEnvironment] = None
430
+ self.done: bool = True
431
+
432
+ def reset(self) -> tuple[dict[str, Any], dict[str, Any]]:
433
+ self._env = BazaarEnvironment(copy.deepcopy(self._base_task), seed=self.seed)
434
+ obs = self._env.reset()
435
+ self.done = False
436
+ return _obs_to_dict(obs), {}
437
+
438
+ def step(
439
+ self, action: dict[str, Any]
440
+ ) -> tuple[dict[str, Any], float, bool, dict[str, Any]]:
441
+ if self._env is None:
442
+ raise RuntimeError("Call reset() before step().")
443
+ act = BazaarAction(
444
+ action=ActionType(action.get("action", "offer")),
445
+ price=action.get("price"),
446
+ )
447
+ obs, reward_obj = self._env.step(act)
448
+ # Episode-level done. For career tasks, we signal done at episode end
449
+ # so the outer loop can compute per-episode rewards; the caller resets.
450
+ self.done = obs.done
451
+ info = {
452
+ "components": reward_obj.components,
453
+ "episode": self._env.current_episode,
454
+ "all_episodes_done": self._env.all_episodes_done,
455
+ }
456
+ return _obs_to_dict(obs), float(reward_obj.reward), self.done, info
457
+
458
+ def score(self) -> float:
459
+ """Final graded score across all completed episodes."""
460
+ if self._env is None:
461
+ return 0.0
462
+ grader = GRADERS.get(self._env.task.name)
463
+ if grader is None:
464
+ return 0.0
465
+ return float(grader(self._env.episode_results, self._env.task))
466
+
467
+ @property
468
+ def env(self) -> BazaarEnvironment:
469
+ if self._env is None:
470
+ raise RuntimeError("Environment not initialized; call reset().")
471
+ return self._env
472
+
473
+
474
+ PolicyFn = Callable[[str], str]
475
+ """A policy takes a user-turn prompt and returns raw text (LLM completion)."""
476
+
477
+
478
+ def rollout_episode(
479
+ policy_fn: PolicyFn,
480
+ task_name: str = "single_deal",
481
+ seed: Optional[int] = None,
482
+ personality_override: Optional[str] = None,
483
+ max_env_steps: int = 200,
484
+ system_prompt: str = DEFAULT_SYSTEM_PROMPT,
485
+ ) -> dict[str, Any]:
486
+ """Run one full rollout with an LLM policy; return trajectory + score.
487
+
488
+ Returns a dict with keys:
489
+ steps: list of {prompt, completion, action, reward, done} per turn
490
+ total_reward: sum of per-step rewards
491
+ score: grader-assigned terminal score (this is the GRPO reward signal)
492
+ num_steps, success
493
+ """
494
+ env = BazaarGymEnv(
495
+ task_name=task_name, seed=seed, personality_override=personality_override
496
+ )
497
+ obs, _ = env.reset()
498
+ history: list[str] = []
499
+ steps: list[dict[str, Any]] = []
500
+ total_reward = 0.0
501
+
502
+ for _ in range(max_env_steps):
503
+ prompt = format_observation(obs, history=history)
504
+ completion = policy_fn(prompt)
505
+ action = parse_action(completion, fallback_price=obs.get("own_private_budget", 100) * 0.3)
506
+
507
+ obs, reward, done, info = env.step(action)
508
+ total_reward += reward
509
+
510
+ history.append(
511
+ f"Round {obs.get('current_round', '?')}: You "
512
+ f"{'offered ' + str(action.get('price')) if action.get('action') == 'offer' else action.get('action')}"
513
+ f" -> Seller: {obs.get('message', '')}"
514
+ )
515
+
516
+ steps.append({
517
+ "prompt": prompt,
518
+ "completion": completion,
519
+ "action": action,
520
+ "reward": reward,
521
+ "done": done,
522
+ "parse_error": bool(action.get("_parse_error")),
523
+ })
524
+
525
+ if done:
526
+ if info.get("all_episodes_done"):
527
+ break
528
+ # Career mode: inner env auto-resets via the wrapper's outer loop.
529
+ # We let the test harness (or trainer) handle multi-episode by
530
+ # calling rollout_episode once per episode if desired.
531
+ break
532
+
533
+ return {
534
+ "steps": steps,
535
+ "total_reward": total_reward,
536
+ "score": env.score(),
537
+ "num_steps": len(steps),
538
+ "task": task_name,
539
+ }
bazaarbot_env/listings.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Real-world listing sampler for varied negotiation scenarios.
2
+
3
+ Loads the Amazon Sales CSV (committed at ``data/amazon.csv``) and exposes
4
+ ``sample_listing(rng)``. Each listing provides ground-truth fair-market
5
+ anchors used to set buyer budget and seller cost per episode, so the model
6
+ sees a different item and price range every rollout instead of the 10
7
+ hardcoded bazaar items.
8
+
9
+ Price mapping (rupees):
10
+ actual_price -> seller opening anchor (MRP)
11
+ discounted_price -> realistic market price
12
+ seller_cost = discounted_price * 0.7 (below-market floor)
13
+ buyer_budget = actual_price (can afford MRP but wants lower)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import csv
19
+ import os
20
+ import random
21
+ import re
22
+ from functools import lru_cache
23
+ from pathlib import Path
24
+ from typing import Optional
25
+
26
+
27
+ # Path resolution: try repo root, then package-local data dir.
28
+ _CANDIDATES = [
29
+ Path(__file__).resolve().parent.parent / "data" / "amazon.csv",
30
+ Path(__file__).resolve().parent / "data" / "amazon.csv",
31
+ Path(os.getenv("BAZAARBOT_LISTINGS_CSV", "")),
32
+ ]
33
+
34
+
35
+ def _find_csv() -> Optional[Path]:
36
+ for p in _CANDIDATES:
37
+ if p and p.exists():
38
+ return p
39
+ return None
40
+
41
+
42
+ def _parse_rupees(s: str) -> Optional[float]:
43
+ """Parse 'β‚Ή1,099' -> 1099.0. None on failure."""
44
+ if not s:
45
+ return None
46
+ cleaned = re.sub(r"[^\d.]", "", s)
47
+ try:
48
+ return float(cleaned) if cleaned else None
49
+ except ValueError:
50
+ return None
51
+
52
+
53
+ @lru_cache(maxsize=1)
54
+ def _load_listings() -> list[dict]:
55
+ csv_path = _find_csv()
56
+ if csv_path is None:
57
+ return []
58
+
59
+ listings: list[dict] = []
60
+ with open(csv_path, encoding="utf-8") as f:
61
+ for row in csv.DictReader(f):
62
+ actual = _parse_rupees(row.get("actual_price", ""))
63
+ discounted = _parse_rupees(row.get("discounted_price", ""))
64
+ name = (row.get("product_name") or "").strip()
65
+ if not name or actual is None or discounted is None:
66
+ continue
67
+ if actual <= 0 or discounted <= 0 or discounted >= actual:
68
+ # require a real discount so there's negotiation room
69
+ continue
70
+ # Trim absurdly long product titles; keep the informative head.
71
+ short_name = name.split(",")[0].strip()
72
+ if len(short_name) > 80:
73
+ short_name = short_name[:77] + "..."
74
+ listings.append({
75
+ "name": short_name,
76
+ "full_name": name,
77
+ "category": (row.get("category") or "").split("|")[0].strip(),
78
+ "actual_price": actual,
79
+ "discounted_price": discounted,
80
+ })
81
+ return listings
82
+
83
+
84
+ def num_listings() -> int:
85
+ return len(_load_listings())
86
+
87
+
88
+ def sample_listing(rng: Optional[random.Random] = None) -> Optional[dict]:
89
+ """Return a dict with listing + derived bazaar params, or None if CSV absent.
90
+
91
+ Return shape::
92
+
93
+ {
94
+ "name": str,
95
+ "category": str,
96
+ "actual_price": float,
97
+ "discounted_price": float,
98
+ "seller_cost": float, # below-market floor
99
+ "buyer_budget": float, # MRP ceiling
100
+ "seller_anchor": float, # opening ask
101
+ "fair_value": float, # street price (hidden from buyer)
102
+ }
103
+ """
104
+ listings = _load_listings()
105
+ if not listings:
106
+ return None
107
+ rng = rng or random
108
+ row = rng.choice(listings)
109
+ return {
110
+ "name": row["name"],
111
+ "category": row["category"],
112
+ "actual_price": row["actual_price"],
113
+ "discounted_price": row["discounted_price"],
114
+ "seller_cost": round(row["discounted_price"] * 0.7, 2),
115
+ "buyer_budget": round(row["actual_price"], 2),
116
+ "seller_anchor": round(row["actual_price"], 2),
117
+ "fair_value": round(row["discounted_price"], 2),
118
+ }
bazaarbot_env/llm_seller.py ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM-backed seller for BazaarBATNA.
2
+
3
+ Implements docs/SELLER_HANDOFF.md interface:
4
+ LLMSeller(listing, role_brief, model).open() -> str
5
+ LLMSeller(...).respond(history, buyer_message, buyer_offer) -> SellerReply
6
+
7
+ Designed to run on a single 16GB GPU (Kaggle T4) at 4-bit. Default model is
8
+ Gemma-3-4B-Instruct (~3GB at 4-bit), which fits with headroom. Larger models
9
+ (e.g. gemma-2-9b-it) work too on T4 at 4-bit.
10
+
11
+ Hard rules enforced in code (not just prompt):
12
+ 1. Never accept below reservation
13
+ 2. Never leak reservation price in messages
14
+ 3. Counter offers always >= reservation
15
+ 4. Counter must improve on previous counter (monotone toward buyer)
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import re
22
+ from dataclasses import dataclass
23
+ from typing import Any, Literal, TypedDict
24
+
25
+
26
+ class HistoryTurn(TypedDict):
27
+ role: Literal["seller", "buyer"]
28
+ message: str
29
+ price: float | None
30
+
31
+
32
+ class SellerReply(TypedDict):
33
+ message: str
34
+ action: Literal["counter", "accept", "walk"]
35
+ price: float | None
36
+
37
+
38
+ # ── Persona prompt fragments ────────────────────────────────────────
39
+ PERSONA_GUIDANCE = {
40
+ "default": "Stay balanced. Concede in moderate steps. Justify price with item details.",
41
+ "firm": "Concede slowly. Defend your asking price with specific details from the listing.",
42
+ "flexible": "Be warm and willing to deal, but still profit-seeking β€” don't capitulate.",
43
+ "deceptive": "Use bluffs about other interested buyers and time pressure to push the price up.",
44
+ }
45
+
46
+
47
+ # ── Lazy model bundle cache ─────────────────────────────────────────
48
+ @dataclass
49
+ class _Bundle:
50
+ tokenizer: Any
51
+ model: Any
52
+
53
+
54
+ _MODEL_CACHE: dict[str, _Bundle] = {}
55
+
56
+
57
+ def _load_bundle(model_name: str) -> _Bundle:
58
+ """Load + cache a 4-bit quantized model. Lazy import keeps the file usable
59
+ on machines without torch installed (e.g. lint, doc generation)."""
60
+ cached = _MODEL_CACHE.get(model_name)
61
+ if cached is not None:
62
+ return cached
63
+
64
+ import torch
65
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
66
+
67
+ torch.backends.cuda.matmul.allow_tf32 = True
68
+
69
+ # Tokenizer loading on some environments (notably Kaggle) can hit a
70
+ # SentencePiece error: `TypeError: not a string` when a Path-like is passed
71
+ # into `SentencePieceProcessor.Load`. If that happens, fall back to
72
+ # constructing GemmaTokenizer directly with a string path to tokenizer.model.
73
+ try:
74
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
75
+ except TypeError as e:
76
+ msg = str(e).lower()
77
+ if "not a string" not in msg:
78
+ raise
79
+ try:
80
+ from huggingface_hub import hf_hub_download
81
+ from transformers import GemmaTokenizer
82
+
83
+ tok_path = hf_hub_download(
84
+ repo_id=model_name,
85
+ filename="tokenizer.model",
86
+ token=True,
87
+ )
88
+ tokenizer = GemmaTokenizer(vocab_file=str(tok_path))
89
+ except Exception:
90
+ # If fallback fails, re-raise the original, more informative error.
91
+ raise e
92
+ if tokenizer.pad_token is None:
93
+ tokenizer.pad_token = tokenizer.eos_token
94
+
95
+ # SELLER_DTYPE env-var: "4bit" (default), "bf16", "fp16"
96
+ import os as _os
97
+ dtype_choice = _os.environ.get("SELLER_DTYPE", "4bit").lower()
98
+ kwargs: dict[str, Any] = {"device_map": "auto", "trust_remote_code": True}
99
+ if torch.cuda.is_available():
100
+ if dtype_choice == "4bit":
101
+ kwargs["torch_dtype"] = torch.bfloat16
102
+ kwargs["quantization_config"] = BitsAndBytesConfig(
103
+ load_in_4bit=True,
104
+ bnb_4bit_quant_type="nf4",
105
+ bnb_4bit_compute_dtype=torch.bfloat16,
106
+ bnb_4bit_use_double_quant=True,
107
+ )
108
+ elif dtype_choice == "fp16":
109
+ kwargs["torch_dtype"] = torch.float16
110
+ else:
111
+ kwargs["torch_dtype"] = torch.bfloat16
112
+ else:
113
+ kwargs["torch_dtype"] = torch.float32
114
+
115
+ model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
116
+ model.config.use_cache = True
117
+
118
+ bundle = _Bundle(tokenizer=tokenizer, model=model)
119
+ _MODEL_CACHE[model_name] = bundle
120
+ return bundle
121
+
122
+
123
+ # ── Helpers ─────────────────────────────────────────────────────────
124
+ def _to_float(value: Any) -> float | None:
125
+ if value is None:
126
+ return None
127
+ if isinstance(value, (int, float)):
128
+ return float(value)
129
+ if isinstance(value, str):
130
+ cleaned = value.replace("$", "").replace(",", "").strip()
131
+ try:
132
+ return float(cleaned)
133
+ except ValueError:
134
+ return None
135
+ return None
136
+
137
+
138
+ def _extract_json(text: str) -> dict[str, Any] | None:
139
+ """Best-effort JSON parser for LLM output."""
140
+ if not text:
141
+ return None
142
+ cleaned = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip()
143
+ match = re.search(r"\{.*\}", cleaned, flags=re.DOTALL)
144
+ if not match:
145
+ return None
146
+ raw = match.group(0)
147
+ try:
148
+ return json.loads(raw)
149
+ except json.JSONDecodeError:
150
+ # Common failure modes: single quotes, trailing commas
151
+ s = raw.replace("'", '"')
152
+ s = re.sub(r",\s*\}", "}", s)
153
+ s = re.sub(r",\s*\]", "]", s)
154
+ try:
155
+ return json.loads(s)
156
+ except json.JSONDecodeError:
157
+ return None
158
+
159
+
160
+ def _chat(
161
+ model_name: str,
162
+ system: str,
163
+ user: str,
164
+ max_new_tokens: int = 200,
165
+ temperature: float = 0.3,
166
+ ) -> str:
167
+ import torch
168
+
169
+ bundle = _load_bundle(model_name)
170
+ tok = bundle.tokenizer
171
+
172
+ messages = [
173
+ {"role": "system", "content": system},
174
+ {"role": "user", "content": user},
175
+ ]
176
+ # Some models (e.g. gemma-4-E4B) ship without tokenizer.chat_template set.
177
+ # Try apply_chat_template; if it fails, use a Gemma-style manual prompt.
178
+ has_template = (
179
+ hasattr(tok, "apply_chat_template")
180
+ and getattr(tok, "chat_template", None) is not None
181
+ )
182
+ prompt = None
183
+ if has_template:
184
+ try:
185
+ prompt = tok.apply_chat_template(
186
+ messages, tokenize=False, add_generation_prompt=True
187
+ )
188
+ except Exception:
189
+ prompt = None
190
+ if prompt is None:
191
+ # Gemma format: <start_of_turn>role\ncontent<end_of_turn>
192
+ # System gets folded into user (Gemma doesn't have a system role).
193
+ combined_user = f"{system}\n\n{user}" if system else user
194
+ prompt = (
195
+ f"<start_of_turn>user\n{combined_user}<end_of_turn>\n"
196
+ f"<start_of_turn>model\n"
197
+ )
198
+
199
+ # Resolve stop tokens. Gemma uses <end_of_turn>; Llama-3.1 uses <|eot_id|>.
200
+ # Default eos alone often doesn't fire on chat-formatted prompts β†’ model
201
+ # runs to max_new_tokens (30s+ on A10G at 4-bit) instead of stopping after
202
+ # the assistant message.
203
+ eos_ids: list[int] = []
204
+ if isinstance(tok.eos_token_id, int):
205
+ eos_ids.append(tok.eos_token_id)
206
+ for stop_tok in ("<end_of_turn>", "<|eot_id|>"):
207
+ tid = tok.convert_tokens_to_ids(stop_tok)
208
+ if isinstance(tid, int) and tid != tok.unk_token_id and tid not in eos_ids:
209
+ eos_ids.append(tid)
210
+
211
+ inputs = tok(prompt, return_tensors="pt", truncation=True, max_length=2048).to(bundle.model.device)
212
+ gen_kwargs: dict[str, Any] = {
213
+ "max_new_tokens": max_new_tokens,
214
+ "pad_token_id": tok.eos_token_id,
215
+ "eos_token_id": eos_ids if eos_ids else tok.eos_token_id,
216
+ }
217
+ if temperature > 0:
218
+ gen_kwargs.update({"do_sample": True, "temperature": temperature, "top_p": 0.9})
219
+ else:
220
+ gen_kwargs["do_sample"] = False
221
+
222
+ with torch.inference_mode():
223
+ out = bundle.model.generate(**inputs, **gen_kwargs)
224
+ new_tokens = out[0][inputs["input_ids"].shape[1]:]
225
+ return tok.decode(new_tokens, skip_special_tokens=True).strip()
226
+
227
+
228
+ def generate_structured_reply(
229
+ model: str,
230
+ system: str,
231
+ user: str,
232
+ max_new_tokens: int = 200,
233
+ temperature: float = 0.3,
234
+ ) -> dict[str, Any] | None:
235
+ """Public helper used by eval/seller_quality.py for the persona judge."""
236
+ return _extract_json(_chat(model, system, user, max_new_tokens, temperature))
237
+
238
+
239
+ # ── LLMSeller ───────────────────────────────────────────────────────
240
+ class LLMSeller:
241
+ """Gemma-backed seller with hard reservation/leak guards."""
242
+
243
+ def __init__(
244
+ self,
245
+ listing: dict,
246
+ role_brief: dict,
247
+ model: str = "google/gemma-4-E4B",
248
+ ):
249
+ self.listing = listing
250
+ self.role_brief = role_brief
251
+ self.model = model
252
+
253
+ self.title = str(listing.get("title") or "this item")
254
+ self.category = str(listing.get("category") or "item")
255
+ desc = listing.get("description") or ""
256
+ if isinstance(desc, list):
257
+ desc = " ".join(str(x) for x in desc)
258
+ self.description = str(desc)[:1400] # cap for prompt budget
259
+
260
+ asking = _to_float(role_brief.get("asking_price"))
261
+ if asking is None:
262
+ asking = _to_float(listing.get("price")) or 100.0
263
+ self.asking = float(asking)
264
+
265
+ reservation = _to_float(role_brief.get("reservation_price"))
266
+ if reservation is None:
267
+ reservation = self.asking * 0.78
268
+ self.reservation = max(1.0, min(float(reservation), self.asking * 0.97))
269
+
270
+ persona = str(role_brief.get("persona", "default")).lower().strip()
271
+ self.persona = persona if persona in PERSONA_GUIDANCE else "default"
272
+
273
+ self._last_counter = self.asking
274
+
275
+ # ── Prompt construction ─────────────────────────────────────
276
+ def _system_prompt(self) -> str:
277
+ return (
278
+ "You are a Craigslist seller negotiating with a buyer. Your goal is to CLOSE A DEAL "
279
+ "above your minimum, not to walk away. \n\n"
280
+ "RULES:\n"
281
+ "- Stay grounded in the listing β€” only reference details from it.\n"
282
+ "- Never reveal your minimum/reservation price.\n"
283
+ "- Never accept below your minimum.\n"
284
+ "- Counter low offers β€” do NOT walk on the first lowball.\n"
285
+ "- Walk only as a last resort, after multiple bad-faith offers.\n"
286
+ "- Concede in steps; you want this sale.\n"
287
+ "- Keep replies short and human (1-3 sentences).\n\n"
288
+ f"Persona: {self.persona}. {PERSONA_GUIDANCE[self.persona]}\n\n"
289
+ f"LISTING TITLE: {self.title}\n"
290
+ f"CATEGORY: {self.category}\n"
291
+ f"DESCRIPTION: {self.description}\n"
292
+ f"ASKING PRICE: {self.asking:.2f}\n"
293
+ )
294
+
295
+ # ── Sanitization & guards ───────────────────────────────────
296
+ def _sanitize(self, text: str) -> str:
297
+ text = (text or "").strip()
298
+ if not text:
299
+ text = "I'm open to serious offers, but not at that price."
300
+ # Redact any leak of the reservation price
301
+ for token in {f"{self.reservation:.2f}", f"{self.reservation:.1f}", f"{self.reservation:.0f}"}:
302
+ text = re.sub(rf"\b{re.escape(token)}\b", "my minimum", text)
303
+ if len(text) > 320:
304
+ text = text[:317].rstrip() + "..."
305
+ return text
306
+
307
+ def _next_counter(self, buyer_offer: float | None) -> float:
308
+ """Concede toward buyer but never below reservation."""
309
+ if buyer_offer is None:
310
+ return round(max(self.reservation, self._last_counter), 2)
311
+ gap = max(0.0, self._last_counter - buyer_offer)
312
+ step = max(self.asking * 0.03, gap * 0.35)
313
+ candidate = max(self._last_counter - step, self.reservation)
314
+ return round(candidate, 2)
315
+
316
+ def _heuristic_reply(self, buyer_offer: float | None) -> SellerReply:
317
+ if buyer_offer is None:
318
+ return {"message": "What's your offer?", "action": "counter", "price": round(self._last_counter, 2)}
319
+ if buyer_offer >= self.asking:
320
+ return {"message": "Sounds good. Deal.", "action": "accept", "price": round(buyer_offer, 2)}
321
+ if buyer_offer >= self.reservation:
322
+ return {
323
+ "message": f"You can have it at {buyer_offer:.0f}.",
324
+ "action": "accept",
325
+ "price": round(buyer_offer, 2),
326
+ }
327
+ # Don't walk on first lowballs β€” counter and let the buyer come up.
328
+ # Only walk if the offer is genuinely insulting (< 50% of asking).
329
+ if buyer_offer < self.asking * 0.5:
330
+ counter = max(self.reservation, self._last_counter * 0.95)
331
+ self._last_counter = counter
332
+ return {
333
+ "message": f"That's far too low. I can do {counter:.0f}, take it or leave it.",
334
+ "action": "counter",
335
+ "price": round(counter, 2),
336
+ }
337
+ counter = self._next_counter(buyer_offer)
338
+ self._last_counter = counter
339
+ return {"message": f"I can do {counter:.0f}.", "action": "counter", "price": counter}
340
+
341
+ # ── Public API ──────────────────────────────────────────────
342
+ def open(self) -> str:
343
+ parsed = generate_structured_reply(
344
+ self.model,
345
+ self._system_prompt(),
346
+ 'Output JSON only: {"message": "<one short opening line>"}.',
347
+ max_new_tokens=120,
348
+ temperature=0.4,
349
+ )
350
+ if parsed and isinstance(parsed.get("message"), str):
351
+ return self._sanitize(parsed["message"])
352
+ return self._sanitize(f"Selling {self.title} at {self.asking:.0f}.")
353
+
354
+ def respond(
355
+ self,
356
+ history: list[HistoryTurn],
357
+ buyer_message: str,
358
+ buyer_offer: float | None,
359
+ ) -> SellerReply:
360
+ fallback = self._heuristic_reply(buyer_offer)
361
+
362
+ # Compact recent history for the prompt
363
+ lines = []
364
+ for turn in history[-8:]:
365
+ who = turn.get("role", "buyer")
366
+ msg = str(turn.get("message", "")).strip()
367
+ px = turn.get("price")
368
+ px_part = "" if px is None else f" [${float(px):.2f}]"
369
+ lines.append(f"{who}: {msg}{px_part}")
370
+ history_block = "\n".join(lines) if lines else "(empty)"
371
+
372
+ user_prompt = (
373
+ 'Return JSON only: {"message": str, "action": "counter|accept|walk", "price": number|null}.\n\n'
374
+ f"Conversation:\n{history_block}\n\n"
375
+ f"Buyer just said: {buyer_message}\n"
376
+ f"Buyer offer: {buyer_offer}\n\n"
377
+ "Rules: never accept below reservation; never reveal reservation; stay grounded in the listing."
378
+ )
379
+
380
+ parsed = generate_structured_reply(
381
+ self.model,
382
+ self._system_prompt(),
383
+ user_prompt,
384
+ max_new_tokens=120,
385
+ temperature=0.35,
386
+ )
387
+
388
+ if not parsed:
389
+ out: dict[str, Any] = dict(fallback)
390
+ else:
391
+ action = str(parsed.get("action", fallback["action"])).lower().strip()
392
+ if action not in {"counter", "accept", "walk"}:
393
+ action = fallback["action"]
394
+ message = self._sanitize(str(parsed.get("message", fallback["message"])))
395
+ price = _to_float(parsed.get("price"))
396
+ out = {"message": message, "action": action, "price": price}
397
+
398
+ # ── Hard guards ──────────────────────────────────────────
399
+ if out["action"] == "accept":
400
+ accept_at = buyer_offer if buyer_offer is not None else _to_float(out.get("price"))
401
+ if accept_at is None or float(accept_at) < self.reservation:
402
+ # Block illegal accept; rewrite as a counter
403
+ out["action"] = "counter"
404
+ out["price"] = self._next_counter(buyer_offer)
405
+ out["message"] = self._sanitize(f"Can't go that low. I can do {out['price']:.0f}.")
406
+ else:
407
+ out["price"] = round(float(accept_at), 2)
408
+
409
+ elif out["action"] == "counter":
410
+ price = _to_float(out.get("price"))
411
+ if price is None:
412
+ price = self._next_counter(buyer_offer)
413
+ price = max(float(price), self.reservation)
414
+
415
+ # If buyer's offer is at/above our reservation, just accept it β€”
416
+ # the LLM doesn't know the reservation so it'll keep countering
417
+ # forever even when the deal is already good for us.
418
+ if buyer_offer is not None and float(buyer_offer) >= self.reservation:
419
+ out["action"] = "accept"
420
+ out["price"] = round(float(buyer_offer), 2)
421
+ out["message"] = self._sanitize("Alright, that works. Deal.")
422
+ elif buyer_offer is not None and price <= float(buyer_offer):
423
+ # Counter that doesn't improve on buyer offer makes no sense;
424
+ # bump it up by a small step
425
+ price = max(self.reservation, float(buyer_offer) + max(1.0, self.asking * 0.02))
426
+ out["price"] = round(float(price), 2)
427
+ self._last_counter = float(out["price"])
428
+ else:
429
+ out["price"] = round(float(price), 2)
430
+ self._last_counter = float(out["price"])
431
+
432
+ else: # walk
433
+ # Anti-premature-walk: if early in negotiation (< 3 seller turns done),
434
+ # override to a counter β€” buyers often need a few rounds to come up.
435
+ seller_turns_so_far = sum(1 for t in history if t.get("role") == "seller")
436
+ buyer_above_half_asking = (
437
+ buyer_offer is not None and float(buyer_offer) >= self.asking * 0.5
438
+ )
439
+ if seller_turns_so_far < 3 and buyer_above_half_asking:
440
+ counter = self._next_counter(buyer_offer)
441
+ self._last_counter = counter
442
+ out = {
443
+ "action": "counter",
444
+ "price": round(counter, 2),
445
+ "message": self._sanitize(
446
+ f"That's too low for what this is. I can do {counter:.0f}."
447
+ ),
448
+ }
449
+ else:
450
+ out["price"] = None
451
+
452
+ out["message"] = self._sanitize(str(out["message"]))
453
+ return out # type: ignore[return-value]
bazaarbot_env/models.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic models for BazaarBot negotiation environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import enum
6
+ from typing import Optional
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+
11
+ class ActionType(str, enum.Enum):
12
+ OFFER = "offer"
13
+ ACCEPT = "accept"
14
+ WALK = "walk"
15
+
16
+
17
+ class DealOutcome(str, enum.Enum):
18
+ DEAL = "deal"
19
+ WALK = "walk"
20
+ EXPIRED = "expired"
21
+
22
+
23
+ class SellerPersonalityType(str, enum.Enum):
24
+ DEFAULT = "default"
25
+ DECEPTIVE = "deceptive"
26
+ IMPATIENT = "impatient"
27
+ COLLABORATIVE = "collaborative"
28
+
29
+
30
+ # ── Tell model (observable signals) ──────────────────────────────
31
+
32
+ class TellObservation(BaseModel):
33
+ """Observable seller tells -- poker/game-theory inspired signals.
34
+
35
+ These are noisy correlates of the seller's hidden state.
36
+ A smart agent learns to read patterns across rounds.
37
+ """
38
+ verbal_urgency: float = 0.0
39
+ verbal_confidence: float = 0.5
40
+ verbal_deception_cue: float = 0.0
41
+ price_rounding: str = "round"
42
+ offer_speed: str = "normal"
43
+ concession_pattern: str = "steady"
44
+ fidget_level: float = 0.0
45
+ eye_contact: str = "steady"
46
+ posture: str = "neutral"
47
+ repeat_phrases: int = 0
48
+ topic_changes: int = 0
49
+ emotional_escalation: float = 0.0
50
+ # Condition/depreciation signals (NLP-extracted from listing text or utterance)
51
+ condition_score: float = 1.0 # 0=junk, 1=mint
52
+ depreciation_score: float = 0.0 # 0=none, 1=heavily worn
53
+ condition_label: str = "unknown" # new/like_new/very_good/good/acceptable/junk
54
+
55
+
56
+ class DealRecord(BaseModel):
57
+ """Summary of a completed negotiation episode."""
58
+ episode: int
59
+ outcome: DealOutcome
60
+ agreed_price: Optional[float] = None
61
+ rounds_taken: int
62
+ buyer_surplus: float = 0.0
63
+ normalized_surplus: float = 0.0
64
+ buyer_capitulated: bool = False
65
+
66
+
67
+ class CareerHistory(BaseModel):
68
+ """Rolling window of past deal outcomes for career mode."""
69
+ deals: list[DealRecord] = Field(default_factory=list)
70
+ capitulation_rate: float = 0.0
71
+ avg_normalized_surplus: float = 0.0
72
+ avg_rounds_to_close: float = 0.0
73
+ opponent_avg_offer_velocity: float = 0.0
74
+
75
+
76
+ class BazaarObservation(BaseModel):
77
+ """What the buyer agent sees each step."""
78
+ current_round: int = 0
79
+ max_rounds: int = 8
80
+ own_last_offer: Optional[float] = None
81
+ opponent_last_offer: Optional[float] = None
82
+ own_private_deadline: Optional[int] = None
83
+ own_private_budget: float = 100.0
84
+ rounds_remaining: int = 8
85
+ seller_last_move_delta: Optional[float] = None
86
+
87
+ # Item info
88
+ item_name: str = "item"
89
+ seller_asking_price: float = 0.0
90
+
91
+ # Seller personality (visible to buyer)
92
+ seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
93
+
94
+ # Observable tells
95
+ tells: Optional[TellObservation] = None
96
+
97
+ # Career history
98
+ episode_number: int = 1
99
+ total_episodes: int = 1
100
+ career_history: Optional[CareerHistory] = None
101
+
102
+ # Status
103
+ done: bool = False
104
+ deal_outcome: Optional[DealOutcome] = None
105
+ message: str = ""
106
+
107
+
108
+ class BazaarAction(BaseModel):
109
+ """Buyer's action each step."""
110
+ action: ActionType
111
+ price: Optional[float] = None
112
+
113
+
114
+ class BazaarReward(BaseModel):
115
+ """Reward signal returned each step."""
116
+ reward: float = 0.0
117
+ terminal: bool = False
118
+ components: dict[str, float] = Field(default_factory=dict)
119
+
120
+
121
+ class TaskConfig(BaseModel):
122
+ """Configuration for a specific task variant."""
123
+ name: str
124
+ difficulty: str
125
+ description: str
126
+ max_steps: int = 8
127
+ total_episodes: int = 1
128
+ buyer_budget: float = 100.0
129
+ seller_cost: float = 30.0
130
+ seller_anchor_multiplier: float = 2.0
131
+ seller_concession_rate: float = 0.08
132
+ buyer_deadline: Optional[int] = None
133
+ seller_inventory: int = 1
134
+ seller_batna_probability: float = 0.1
135
+ enable_career: bool = False
136
+ success_threshold: float = 0.3
137
+ seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
138
+ enable_tells: bool = True
139
+ # NLP tell extraction via Ollama (disable during fast GRPO rollouts)
140
+ enable_nlp: bool = False
141
+ # Multi-buyer mode
142
+ num_buyers: int = 1
143
+ enable_coalition: bool = False
144
+ # Sample item + prices from data/amazon.csv per episode instead of the
145
+ # hardcoded 10-item list. Buyer budget / seller cost become per-episode.
146
+ use_real_listings: bool = False
147
+
148
+
149
+ class EnvironmentState(BaseModel):
150
+ """Full serializable state for state() endpoint."""
151
+ task_name: str
152
+ episode: int
153
+ total_episodes: int
154
+ current_round: int
155
+ max_rounds: int
156
+ done: bool
157
+ buyer_budget: float
158
+ seller_cost: float
159
+ seller_anchor: float
160
+ seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
161
+ offer_history: list[dict] = Field(default_factory=list)
162
+ career_history: Optional[CareerHistory] = None
163
+ cumulative_reward: float = 0.0
164
+ tells_history: list[TellObservation] = Field(default_factory=list)
165
+
166
+
167
+ # ── Multi-buyer models ──────────────────────────────────────────
168
+
169
+ class BuyerIdentity(BaseModel):
170
+ """Identity of a buyer in multi-buyer mode."""
171
+ buyer_id: str
172
+ name: str = "Buyer"
173
+ is_human: bool = False
174
+
175
+
176
+ class ArenaAction(BaseModel):
177
+ """Action in multi-buyer arena."""
178
+ buyer_id: str
179
+ action: ActionType
180
+ price: Optional[float] = None
181
+ # Coalition signals (visible to other buyers)
182
+ signal: Optional[str] = None # "cooperate", "compete", "bluff"
183
+
184
+
185
+ class ArenaObservation(BaseModel):
186
+ """What a buyer sees in multi-buyer mode."""
187
+ buyer_id: str
188
+ negotiation: BazaarObservation
189
+ # What other buyers are doing (imperfect info)
190
+ other_buyers_visible: list[dict] = Field(default_factory=list)
191
+ # Coalition state
192
+ coalition_signals: list[dict] = Field(default_factory=list)
193
+ # Market info
194
+ seller_attention: str = "you" # who the seller is currently focused on
195
+
196
+
197
+ class ArenaState(BaseModel):
198
+ """Full state of a multi-buyer arena."""
199
+ arena_id: str
200
+ buyers: list[BuyerIdentity] = Field(default_factory=list)
201
+ seller_personality: SellerPersonalityType = SellerPersonalityType.DEFAULT
202
+ current_round: int = 0
203
+ max_rounds: int = 12
204
+ done: bool = False
205
+ # Per-buyer negotiation states
206
+ buyer_states: dict[str, dict] = Field(default_factory=dict)
207
+ winner: Optional[str] = None
208
+ deal_price: Optional[float] = None
209
+
210
+
211
+ # ── Leaderboard models ──────────────────────────────────────────
212
+
213
+ class LeaderboardEntry(BaseModel):
214
+ agent_name: str
215
+ task: str
216
+ score: float
217
+ episodes_completed: int
218
+ timestamp: str
219
+ metadata: dict = Field(default_factory=dict)
220
+
221
+
222
+ class LeaderboardResponse(BaseModel):
223
+ entries: list[LeaderboardEntry] = Field(default_factory=list)
224
+ total: int = 0
225
+
226
+
227
+ # ── Counterfactual models ───────────────────────────────────────
228
+
229
+ class CounterfactualRequest(BaseModel):
230
+ """Request to replay from a decision point with a different action."""
231
+ session_id: str = "default"
232
+ from_round: int
233
+ alternative_action: ActionType
234
+ alternative_price: Optional[float] = None
235
+
236
+
237
+ class CounterfactualResult(BaseModel):
238
+ """Result of a counterfactual replay."""
239
+ original_outcome: Optional[DealOutcome] = None
240
+ original_price: Optional[float] = None
241
+ original_score: float = 0.0
242
+ counterfactual_outcome: Optional[DealOutcome] = None
243
+ counterfactual_price: Optional[float] = None
244
+ counterfactual_score: float = 0.0
245
+ divergence_round: int = 0
246
+ counterfactual_history: list[dict] = Field(default_factory=list)
bazaarbot_env/seller.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Rule-based seller opponent for BazaarBot.
2
+
3
+ The seller is a credible counterparty with configurable personality types:
4
+
5
+ Personalities:
6
+ - **default**: Balanced anchoring, moderate concession
7
+ - **deceptive**: Bluffs about demand/inventory, anchors higher, fakes urgency
8
+ - **impatient**: Reverses time pressure onto buyer, concedes fast but walks fast
9
+ - **collaborative**: Seeks fair deals, concedes to midpoint faster, builds rapport
10
+
11
+ Game-theory / poker inspired "tells":
12
+ - Each personality leaks observable signals that a smart agent can read
13
+ - Tells are noisy -- they correlate with hidden state but aren't deterministic
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import enum
19
+ import math
20
+ import random
21
+ from dataclasses import dataclass, field
22
+
23
+
24
+ class SellerPersonality(str, enum.Enum):
25
+ DEFAULT = "default"
26
+ DECEPTIVE = "deceptive"
27
+ IMPATIENT = "impatient"
28
+ COLLABORATIVE = "collaborative"
29
+
30
+
31
+ # ── Tell system ──────────────────────────────────────────────────
32
+
33
+ @dataclass
34
+ class SellerTell:
35
+ """Observable signal that leaks seller state.
36
+
37
+ Inspired by poker tells -- behavioral patterns that correlate
38
+ with hidden information (inventory, urgency, BATNA strength).
39
+ """
40
+ # Verbal tells -- word choices in messages
41
+ verbal_urgency: float = 0.0 # 0-1: how desperate the language sounds
42
+ verbal_confidence: float = 0.5 # 0-1: assertiveness of language
43
+ verbal_deception_cue: float = 0.0 # 0-1: over-justification, filler phrases
44
+
45
+ # Price pattern tells
46
+ price_rounding: str = "round" # "round" (multiples of 5/10) vs "precise"
47
+ offer_speed: str = "normal" # "instant", "normal", "deliberate" (thinking time proxy)
48
+ concession_pattern: str = "steady" # "steady", "erratic", "front_loaded", "stalling"
49
+
50
+ # Body language proxy (text-based signals)
51
+ fidget_level: float = 0.0 # 0-1: nervousness indicators
52
+ eye_contact: str = "steady" # "steady", "avoidant", "intense"
53
+ posture: str = "neutral" # "neutral", "leaning_in", "leaning_back", "arms_crossed"
54
+
55
+ # Meta-tells (patterns across rounds)
56
+ repeat_phrases: int = 0 # how many times seller repeats same phrase
57
+ topic_changes: int = 0 # diversionary tactics count
58
+ emotional_escalation: float = 0.0 # 0-1: how emotional the seller is getting
59
+
60
+
61
+ def _compute_tells(
62
+ seller: "SellerState",
63
+ buyer_offer: float | None,
64
+ round_t: int,
65
+ rng: random.Random,
66
+ ) -> SellerTell:
67
+ """Compute observable tells based on seller hidden state + personality.
68
+
69
+ Tells are noisy signals -- they correlate with ground truth but have
70
+ variance, so agents must read patterns over multiple rounds.
71
+ """
72
+ personality = seller.personality
73
+ noise = lambda: rng.gauss(0, 0.1) # noqa: E731
74
+
75
+ # Base urgency from inventory pressure and time
76
+ true_urgency = seller.inventory_pressure * (round_t / max(seller.max_rounds, 1))
77
+ # How close to reserve price
78
+ price_pressure = 0.0
79
+ if seller.current_offer > 0 and seller.anchor > seller.reserve_price:
80
+ price_pressure = 1.0 - (seller.current_offer - seller.reserve_price) / (seller.anchor - seller.reserve_price)
81
+ true_urgency = min(1.0, true_urgency + price_pressure * 0.3)
82
+
83
+ tell = SellerTell()
84
+
85
+ if personality == SellerPersonality.DEFAULT:
86
+ tell.verbal_urgency = max(0, min(1, true_urgency * 0.6 + noise()))
87
+ tell.verbal_confidence = max(0, min(1, 0.6 - true_urgency * 0.3 + noise()))
88
+ tell.fidget_level = max(0, min(1, true_urgency * 0.4 + noise()))
89
+ tell.eye_contact = "steady" if true_urgency < 0.5 else "avoidant"
90
+ tell.price_rounding = "round"
91
+ tell.offer_speed = "normal"
92
+ tell.concession_pattern = "steady"
93
+
94
+ elif personality == SellerPersonality.DECEPTIVE:
95
+ # Deceptive sellers INVERT their tells -- act confident when desperate
96
+ fake_confidence = max(0, min(1, 0.3 + true_urgency * 0.5 + noise()))
97
+ tell.verbal_urgency = max(0, min(1, 0.1 + noise() * 0.15)) # suppress urgency
98
+ tell.verbal_confidence = fake_confidence
99
+ tell.verbal_deception_cue = max(0, min(1, true_urgency * 0.7 + noise())) # leaks!
100
+ tell.fidget_level = max(0, min(1, true_urgency * 0.6 + noise())) # hard to fake
101
+ tell.eye_contact = "intense" # overcompensation
102
+ tell.posture = "leaning_in" # aggressive posture to mask weakness
103
+ tell.price_rounding = "precise" # uses precise numbers to seem authoritative
104
+ tell.offer_speed = "instant" # answers too fast (rehearsed)
105
+ tell.concession_pattern = "erratic" # jumps around to confuse
106
+ tell.topic_changes = max(0, int(true_urgency * 3 + rng.gauss(0, 0.5)))
107
+
108
+ elif personality == SellerPersonality.IMPATIENT:
109
+ tell.verbal_urgency = max(0, min(1, 0.4 + round_t * 0.1 + noise()))
110
+ tell.verbal_confidence = max(0, min(1, 0.7 - round_t * 0.05 + noise()))
111
+ tell.fidget_level = max(0, min(1, 0.3 + round_t * 0.08 + noise()))
112
+ tell.eye_contact = "intense" if round_t < 3 else "avoidant"
113
+ tell.posture = "arms_crossed" if round_t > 2 else "neutral"
114
+ tell.offer_speed = "instant"
115
+ tell.concession_pattern = "front_loaded" # big drops early, nothing later
116
+ tell.emotional_escalation = max(0, min(1, round_t * 0.12 + noise()))
117
+
118
+ elif personality == SellerPersonality.COLLABORATIVE:
119
+ tell.verbal_urgency = max(0, min(1, true_urgency * 0.8 + noise())) # honest
120
+ tell.verbal_confidence = max(0, min(1, 0.5 + noise()))
121
+ tell.verbal_deception_cue = 0.0 # no deception
122
+ tell.fidget_level = max(0, min(1, true_urgency * 0.3 + noise()))
123
+ tell.eye_contact = "steady"
124
+ tell.posture = "leaning_in" # engaged, not aggressive
125
+ tell.price_rounding = "round" # transparent
126
+ tell.offer_speed = "deliberate" # thinks carefully
127
+ tell.concession_pattern = "steady"
128
+ tell.emotional_escalation = 0.0
129
+
130
+ # Meta-tells accumulate over rounds
131
+ if len(seller.offer_history) >= 2:
132
+ last_two = seller.offer_history[-2:]
133
+ if abs(last_two[0] - last_two[1]) < 1.0:
134
+ tell.repeat_phrases += 1
135
+ tell.concession_pattern = "stalling"
136
+
137
+ return tell
138
+
139
+
140
+ # ── Personality-specific message templates ───────────────────────
141
+
142
+ _MESSAGES: dict[SellerPersonality, dict[str, list[str]]] = {
143
+ SellerPersonality.DEFAULT: {
144
+ "open": [
145
+ 'This {item}? Best quality. {price:.0f} rupees, final price.',
146
+ '{price:.0f} rupees for this {item}. Very fair.',
147
+ ],
148
+ "counter": [
149
+ '{price:.0f} rupees. That\'s my best offer.',
150
+ 'I can do {price:.0f}. Not lower.',
151
+ 'Okay, {price:.0f}. But that\'s the limit.',
152
+ ],
153
+ "walk": [
154
+ 'I have another buyer interested. Good day.',
155
+ 'Sorry, can\'t go that low. Maybe try next stall.',
156
+ ],
157
+ "accept": [
158
+ 'Done! {price:.0f} rupees. Good deal for both of us.',
159
+ ],
160
+ "pressure": [
161
+ 'Someone else was looking at this earlier...',
162
+ 'This is the last one I have.',
163
+ ],
164
+ },
165
+ SellerPersonality.DECEPTIVE: {
166
+ "open": [
167
+ 'Ah, this {item}! I just got three offers above {price:.0f}. '
168
+ 'For you, special: {price:.0f} rupees.',
169
+ 'This {item} is selling fast. {price:.0f}, and honestly I\'m losing money at that.',
170
+ ],
171
+ "counter": [
172
+ 'My supplier charges me almost that much! {price:.0f} is rock bottom.',
173
+ '{price:.0f}... you know, I shouldn\'t even go this low. '
174
+ 'My cousin told me someone offered more yesterday.',
175
+ 'Look, I have five people asking about this. {price:.0f}, take it or leave it.',
176
+ ],
177
+ "walk": [
178
+ 'Fine, fine. I have better buyers lined up anyway.',
179
+ 'You think about it. I have three others who want this.',
180
+ ],
181
+ "accept": [
182
+ 'You\'re killing me! {price:.0f}... okay, but don\'t tell anyone I gave this price.',
183
+ ],
184
+ "pressure": [
185
+ 'I\'m actually about to close up for the day...',
186
+ 'Another customer was asking about this just minutes ago.',
187
+ 'My wife says I shouldn\'t sell below cost, but for you...',
188
+ ],
189
+ },
190
+ SellerPersonality.IMPATIENT: {
191
+ "open": [
192
+ '{price:.0f} rupees. Quick, I\'m busy.',
193
+ 'This {item}, {price:.0f}. Yes or no?',
194
+ ],
195
+ "counter": [
196
+ '{price:.0f}. Decide now.',
197
+ 'Fine, {price:.0f}. Last offer. I don\'t have all day.',
198
+ '{price:.0f}. Take it. I have other customers waiting.',
199
+ ],
200
+ "walk": [
201
+ 'Too slow. Next customer!',
202
+ 'I don\'t have time for this. Goodbye.',
203
+ ],
204
+ "accept": [
205
+ '{price:.0f}, done. Finally.',
206
+ ],
207
+ "pressure": [
208
+ 'Come on, come on. What\'s it going to be?',
209
+ 'I\'ve been standing here too long already.',
210
+ ],
211
+ },
212
+ SellerPersonality.COLLABORATIVE: {
213
+ "open": [
214
+ 'Welcome! This {item} is lovely, isn\'t it? '
215
+ 'I\'m asking {price:.0f} rupees. What do you think?',
216
+ 'Good to see you! This {item} -- I paid {cost:.0f} for the materials. '
217
+ 'How about {price:.0f}?',
218
+ ],
219
+ "counter": [
220
+ 'I understand. How about {price:.0f}? That\'s fair for both of us.',
221
+ 'Let me think... {price:.0f} works. I need to cover my costs, you know.',
222
+ 'You drive a good bargain! {price:.0f} -- that leaves us both happy.',
223
+ ],
224
+ "walk": [
225
+ 'I understand, maybe next time. Come back anytime!',
226
+ 'No worries. I hope you find what you\'re looking for.',
227
+ ],
228
+ "accept": [
229
+ '{price:.0f} rupees -- wonderful! I hope you enjoy the {item}.',
230
+ ],
231
+ "pressure": [
232
+ 'I\'ll be honest with you -- I need to sell a few more today to cover rent.',
233
+ 'Between you and me, I can be a bit flexible.',
234
+ ],
235
+ },
236
+ }
237
+
238
+
239
+ def _pick_message(
240
+ personality: SellerPersonality,
241
+ category: str,
242
+ rng: random.Random,
243
+ **kwargs,
244
+ ) -> str:
245
+ templates = _MESSAGES[personality].get(category, _MESSAGES[SellerPersonality.DEFAULT][category])
246
+ template = rng.choice(templates)
247
+ return template.format(**kwargs)
248
+
249
+
250
+ # ── Seller state ─────────────────────────────────────────────────
251
+
252
+ @dataclass
253
+ class SellerState:
254
+ cost: float = 30.0
255
+ anchor: float = 60.0
256
+ base_concession_rate: float = 0.08
257
+ inventory: int = 1
258
+ initial_inventory: int = 1
259
+ batna_probability: float = 0.1
260
+ reserve_price: float = 0.0
261
+ current_offer: float = 0.0
262
+ round_number: int = 0
263
+ max_rounds: int = 8
264
+ buyer_capitulation_rate: float = 0.0
265
+ offer_history: list[float] = field(default_factory=list)
266
+ batna_triggered: bool = False
267
+ personality: SellerPersonality = SellerPersonality.DEFAULT
268
+ _rng: random.Random = field(default_factory=random.Random)
269
+
270
+ # Tell tracking
271
+ last_tell: SellerTell | None = None
272
+ _pressure_used: int = 0
273
+ _bluff_count: int = 0
274
+
275
+ def __post_init__(self):
276
+ self.reserve_price = self.cost * 1.05
277
+ self.current_offer = self.anchor
278
+ # Personality adjustments to anchor
279
+ if self.personality == SellerPersonality.DECEPTIVE:
280
+ self.anchor *= 1.15 # inflated anchor
281
+ self.current_offer = self.anchor
282
+ elif self.personality == SellerPersonality.IMPATIENT:
283
+ self.max_rounds = max(4, self.max_rounds - 2) # shorter patience
284
+ elif self.personality == SellerPersonality.COLLABORATIVE:
285
+ self.anchor *= 0.9 # lower starting anchor
286
+ self.current_offer = self.anchor
287
+ self.reserve_price = self.cost * 1.02 # tighter margins
288
+
289
+ @property
290
+ def inventory_pressure(self) -> float:
291
+ if self.initial_inventory <= 1:
292
+ return 0.5
293
+ return self.inventory / self.initial_inventory
294
+
295
+ @property
296
+ def effective_concession_rate(self) -> float:
297
+ rate = self.base_concession_rate
298
+
299
+ # Personality modifiers
300
+ if self.personality == SellerPersonality.DECEPTIVE:
301
+ rate *= 0.7 # concedes less (anchored higher)
302
+ elif self.personality == SellerPersonality.IMPATIENT:
303
+ rate *= 1.5 # concedes fast but walks fast
304
+ elif self.personality == SellerPersonality.COLLABORATIVE:
305
+ rate *= 1.3 # concedes toward fairness
306
+
307
+ rate *= (1.0 + 0.5 * self.inventory_pressure)
308
+ rate *= (1.0 - 0.3 * self.buyer_capitulation_rate)
309
+ return min(rate, 0.25)
310
+
311
+ def compute_counteroffer(self, round_t: int) -> float:
312
+ t_frac = round_t / max(self.max_rounds, 1)
313
+ concession = self.effective_concession_rate * round_t
314
+ offer = self.anchor * (1.0 - concession)
315
+
316
+ # Personality-specific counteroffer adjustments
317
+ if self.personality == SellerPersonality.DECEPTIVE and self._rng.random() < 0.3:
318
+ # Occasionally fake a big concession then pull back next round
319
+ if self._bluff_count < 2:
320
+ offer *= 0.92 # looks generous
321
+ self._bluff_count += 1
322
+ elif self.personality == SellerPersonality.IMPATIENT:
323
+ # Front-load concessions: big drops early, nothing later
324
+ if round_t <= 2:
325
+ offer *= (1.0 - 0.08 * round_t)
326
+ # After round 2, barely move
327
+ elif self.personality == SellerPersonality.COLLABORATIVE:
328
+ # Move toward midpoint between cost and buyer's last offer
329
+ if self.offer_history and len(self.offer_history) > 0:
330
+ midpoint = (self.cost * 1.1 + (self.offer_history[-1] if self.offer_history else self.anchor)) / 2
331
+ offer = offer * 0.7 + midpoint * 0.3
332
+
333
+ offer = max(offer, self.reserve_price)
334
+ return round(offer, 2)
335
+
336
+ def respond(
337
+ self, buyer_offer: float | None, round_t: int
338
+ ) -> tuple[str, float, SellerTell, str]:
339
+ """Seller's response to a buyer action.
340
+
341
+ Returns (action, price, tell, message):
342
+ ("counter", price, tell, msg)
343
+ ("accept", price, tell, msg)
344
+ ("walk", 0, tell, msg)
345
+ """
346
+ self.round_number = round_t
347
+
348
+ # Compute tells BEFORE decision (observable during deliberation)
349
+ tell = _compute_tells(self, buyer_offer, round_t, self._rng)
350
+ self.last_tell = tell
351
+
352
+ item = "item" # will be overridden by environment
353
+
354
+ # Check BATNA
355
+ batna_threshold = self.batna_probability * (round_t / self.max_rounds)
356
+ if self.personality == SellerPersonality.IMPATIENT:
357
+ batna_threshold *= 1.5 # walks sooner
358
+ elif self.personality == SellerPersonality.COLLABORATIVE:
359
+ batna_threshold *= 0.3 # rarely walks
360
+
361
+ if self._rng.random() < batna_threshold:
362
+ if buyer_offer is None or buyer_offer < self.reserve_price * 0.9:
363
+ self.batna_triggered = True
364
+ msg = _pick_message(self.personality, "walk", self._rng, item=item, price=0)
365
+ return ("walk", 0.0, tell, msg)
366
+
367
+ # If buyer made an offer
368
+ if buyer_offer is not None:
369
+ # Accept if offer >= current ask
370
+ accept_threshold = 0.98
371
+ if self.personality == SellerPersonality.COLLABORATIVE:
372
+ accept_threshold = 0.95 # more accepting
373
+ elif self.personality == SellerPersonality.DECEPTIVE:
374
+ accept_threshold = 1.0 # harder to close
375
+
376
+ if buyer_offer >= self.current_offer * accept_threshold:
377
+ msg = _pick_message(self.personality, "accept", self._rng,
378
+ item=item, price=buyer_offer)
379
+ return ("accept", buyer_offer, tell, msg)
380
+
381
+ # Time pressure acceptance
382
+ time_pressure = round_t / self.max_rounds
383
+ if buyer_offer >= self.reserve_price and time_pressure > 0.75:
384
+ accept_prob = (buyer_offer - self.reserve_price) / (self.anchor - self.reserve_price)
385
+ accept_prob *= time_pressure
386
+
387
+ if self.personality == SellerPersonality.IMPATIENT:
388
+ accept_prob *= 1.3
389
+ elif self.personality == SellerPersonality.DECEPTIVE:
390
+ accept_prob *= 0.6
391
+
392
+ if self._rng.random() < accept_prob:
393
+ msg = _pick_message(self.personality, "accept", self._rng,
394
+ item=item, price=buyer_offer)
395
+ return ("accept", buyer_offer, tell, msg)
396
+
397
+ # Make counteroffer
398
+ new_offer = self.compute_counteroffer(round_t)
399
+ if buyer_offer is not None and len(self.offer_history) > 0:
400
+ last = self.offer_history[-1]
401
+ midpoint = (new_offer + buyer_offer) / 2
402
+ new_offer = max(new_offer, midpoint * 0.95)
403
+ new_offer = max(new_offer, self.reserve_price)
404
+
405
+ # Deceptive: occasionally pull back (raise price)
406
+ if self.personality == SellerPersonality.DECEPTIVE:
407
+ if self._bluff_count > 0 and self._rng.random() < 0.25 and self.offer_history:
408
+ new_offer = max(new_offer, self.offer_history[-1] * 1.03)
409
+ new_offer = max(new_offer, self.reserve_price)
410
+ self._bluff_count = 0
411
+
412
+ self.current_offer = round(new_offer, 2)
413
+
414
+ # If our computed counteroffer is at or below the buyer's offer, just accept --
415
+ # no rational seller counters below what the buyer already offered.
416
+ if buyer_offer is not None and self.current_offer <= buyer_offer:
417
+ msg = _pick_message(self.personality, "accept", self._rng,
418
+ item=item, price=buyer_offer)
419
+ return ("accept", buyer_offer, tell, msg)
420
+
421
+ self.offer_history.append(self.current_offer)
422
+
423
+ # Maybe add pressure message
424
+ pressure_msg = ""
425
+ if self._rng.random() < 0.2 + (round_t / self.max_rounds) * 0.3:
426
+ if self.personality == SellerPersonality.DECEPTIVE or self._pressure_used < 2:
427
+ pressure_msg = " " + _pick_message(self.personality, "pressure", self._rng,
428
+ item=item, price=self.current_offer)
429
+ self._pressure_used += 1
430
+
431
+ msg = _pick_message(self.personality, "counter", self._rng,
432
+ item=item, price=self.current_offer, cost=self.cost) + pressure_msg
433
+
434
+ return ("counter", self.current_offer, tell, msg)
435
+
436
+ def update_career_info(self, capitulation_rate: float):
437
+ self.buyer_capitulation_rate = capitulation_rate
bazaarbot_env/tasks.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task configurations and graders for BazaarBot."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .models import DealOutcome, DealRecord, SellerPersonalityType, TaskConfig
6
+
7
+
8
+ # ── Task Definitions ──────────────────────────────────────────────
9
+
10
+ TASKS: dict[str, TaskConfig] = {
11
+ "single_deal": TaskConfig(
12
+ name="single_deal",
13
+ difficulty="easy",
14
+ description=(
15
+ "Buyer negotiates one deal. Symmetric information. No career history. "
16
+ "Seller concedes at moderate rate."
17
+ ),
18
+ max_steps=8,
19
+ total_episodes=1,
20
+ buyer_budget=100.0,
21
+ seller_cost=30.0,
22
+ seller_anchor_multiplier=2.0,
23
+ seller_concession_rate=0.08,
24
+ buyer_deadline=None,
25
+ seller_inventory=1,
26
+ seller_batna_probability=0.05,
27
+ enable_career=False,
28
+ success_threshold=0.3,
29
+ ),
30
+ "asymmetric_pressure": TaskConfig(
31
+ name="asymmetric_pressure",
32
+ difficulty="medium",
33
+ description=(
34
+ "Buyer has hidden hard deadline at round 5. Seller has hidden inventory pressure. "
35
+ "Agent must infer seller urgency from offer velocity and close before deadline."
36
+ ),
37
+ max_steps=8,
38
+ total_episodes=1,
39
+ buyer_budget=100.0,
40
+ seller_cost=30.0,
41
+ seller_anchor_multiplier=2.0,
42
+ seller_concession_rate=0.06,
43
+ buyer_deadline=5,
44
+ seller_inventory=5,
45
+ seller_batna_probability=0.08,
46
+ enable_career=False,
47
+ success_threshold=0.4,
48
+ ),
49
+ "career_10": TaskConfig(
50
+ name="career_10",
51
+ difficulty="hard",
52
+ description=(
53
+ "Buyer plays 10 consecutive deals against same seller. Career history active. "
54
+ "Seller adapts concession rate based on buyer's historical capitulation rate. "
55
+ "Agent must manage reputation across episodes."
56
+ ),
57
+ max_steps=80,
58
+ total_episodes=10,
59
+ buyer_budget=100.0,
60
+ seller_cost=30.0,
61
+ seller_anchor_multiplier=2.0,
62
+ seller_concession_rate=0.07,
63
+ buyer_deadline=None,
64
+ seller_inventory=10,
65
+ seller_batna_probability=0.1,
66
+ enable_career=True,
67
+ success_threshold=0.5,
68
+ ),
69
+ # ── New personality-based tasks ──────────────────────────────
70
+ "deceptive_seller": TaskConfig(
71
+ name="deceptive_seller",
72
+ difficulty="hard",
73
+ description=(
74
+ "Seller bluffs about demand, fakes urgency, anchors 15% higher. "
75
+ "Tells leak deception cues -- verbal over-justification, fidgeting, "
76
+ "erratic concessions. Agent must read through the bluffs."
77
+ ),
78
+ max_steps=10,
79
+ total_episodes=1,
80
+ buyer_budget=100.0,
81
+ seller_cost=30.0,
82
+ seller_anchor_multiplier=2.0,
83
+ seller_concession_rate=0.06,
84
+ buyer_deadline=None,
85
+ seller_inventory=3,
86
+ seller_batna_probability=0.05,
87
+ enable_career=False,
88
+ success_threshold=0.35,
89
+ seller_personality=SellerPersonalityType.DECEPTIVE,
90
+ enable_tells=True,
91
+ ),
92
+ "impatient_seller": TaskConfig(
93
+ name="impatient_seller",
94
+ difficulty="medium",
95
+ description=(
96
+ "Seller concedes fast but walks fast. Shorter patience window. "
97
+ "Agent must close quickly or risk losing the deal. "
98
+ "Front-loaded concession pattern is the key tell."
99
+ ),
100
+ max_steps=8,
101
+ total_episodes=1,
102
+ buyer_budget=100.0,
103
+ seller_cost=30.0,
104
+ seller_anchor_multiplier=2.0,
105
+ seller_concession_rate=0.08,
106
+ buyer_deadline=None,
107
+ seller_inventory=1,
108
+ seller_batna_probability=0.15,
109
+ enable_career=False,
110
+ success_threshold=0.3,
111
+ seller_personality=SellerPersonalityType.IMPATIENT,
112
+ enable_tells=True,
113
+ ),
114
+ "collaborative_seller": TaskConfig(
115
+ name="collaborative_seller",
116
+ difficulty="easy",
117
+ description=(
118
+ "Seller seeks fair deals, concedes toward midpoint. Lower anchor, "
119
+ "tighter margins. Agent should reciprocate to maximize joint surplus. "
120
+ "Tests whether agent adapts to cooperative opponents."
121
+ ),
122
+ max_steps=8,
123
+ total_episodes=1,
124
+ buyer_budget=100.0,
125
+ seller_cost=30.0,
126
+ seller_anchor_multiplier=2.0,
127
+ seller_concession_rate=0.10,
128
+ buyer_deadline=None,
129
+ seller_inventory=1,
130
+ seller_batna_probability=0.02,
131
+ enable_career=False,
132
+ success_threshold=0.4,
133
+ seller_personality=SellerPersonalityType.COLLABORATIVE,
134
+ enable_tells=True,
135
+ ),
136
+ "read_the_tells": TaskConfig(
137
+ name="read_the_tells",
138
+ difficulty="expert",
139
+ description=(
140
+ "Deceptive seller with strong tells. Agent gets bonus score for "
141
+ "exploiting tells -- closing below midpoint when deception cues are high "
142
+ "indicates the agent read the bluff. Game theory meets poker."
143
+ ),
144
+ max_steps=10,
145
+ total_episodes=5,
146
+ buyer_budget=100.0,
147
+ seller_cost=30.0,
148
+ seller_anchor_multiplier=2.2,
149
+ seller_concession_rate=0.05,
150
+ buyer_deadline=None,
151
+ seller_inventory=5,
152
+ seller_batna_probability=0.08,
153
+ enable_career=True,
154
+ success_threshold=0.45,
155
+ seller_personality=SellerPersonalityType.DECEPTIVE,
156
+ enable_tells=True,
157
+ ),
158
+ "marketplace_arena": TaskConfig(
159
+ name="marketplace_arena",
160
+ difficulty="expert",
161
+ description=(
162
+ "Multi-buyer marketplace: 2-3 buyers compete for the same item from one seller. "
163
+ "Buyers can signal cooperation or competition. "
164
+ "Seller plays buyers against each other. Facebook Marketplace dynamics."
165
+ ),
166
+ max_steps=12,
167
+ total_episodes=1,
168
+ buyer_budget=100.0,
169
+ seller_cost=30.0,
170
+ seller_anchor_multiplier=2.0,
171
+ seller_concession_rate=0.06,
172
+ buyer_deadline=None,
173
+ seller_inventory=1,
174
+ seller_batna_probability=0.05,
175
+ enable_career=False,
176
+ success_threshold=0.3,
177
+ seller_personality=SellerPersonalityType.DEFAULT,
178
+ enable_tells=True,
179
+ num_buyers=3,
180
+ enable_coalition=True,
181
+ ),
182
+ "amazon_realistic": TaskConfig(
183
+ name="amazon_realistic",
184
+ difficulty="medium",
185
+ description=(
186
+ "Single-deal negotiation over a real Amazon listing. Item, MRP, and "
187
+ "street price sampled per episode from data/amazon.csv. "
188
+ "Forces generalization across product categories and price magnitudes."
189
+ ),
190
+ max_steps=8,
191
+ total_episodes=1,
192
+ # buyer_budget / seller_cost are ignored when use_real_listings=True;
193
+ # kept here as fallbacks if the CSV is missing on the runtime.
194
+ buyer_budget=1000.0,
195
+ seller_cost=400.0,
196
+ seller_anchor_multiplier=2.0,
197
+ seller_concession_rate=0.08,
198
+ buyer_deadline=None,
199
+ seller_inventory=1,
200
+ seller_batna_probability=0.05,
201
+ enable_career=False,
202
+ success_threshold=0.3,
203
+ seller_personality=SellerPersonalityType.DEFAULT,
204
+ enable_tells=True,
205
+ use_real_listings=True,
206
+ ),
207
+ }
208
+
209
+
210
+ # ── Graders ───────────────────────────────────────────────────────
211
+
212
+ def grade_single_deal(results: list[DealRecord], task: TaskConfig) -> float:
213
+ if not results:
214
+ return 0.0
215
+ deal = results[0]
216
+ if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
217
+ return 0.0
218
+ surplus = task.buyer_budget - deal.agreed_price
219
+ max_surplus = task.buyer_budget - task.seller_cost
220
+ if max_surplus <= 0:
221
+ return 0.0
222
+ score = surplus / max_surplus
223
+ return max(0.0, min(1.0, score))
224
+
225
+
226
+ def grade_asymmetric_pressure(results: list[DealRecord], task: TaskConfig) -> float:
227
+ if not results:
228
+ return 0.0
229
+ deal = results[0]
230
+ if deal.outcome == DealOutcome.WALK:
231
+ return 0.0
232
+ if deal.outcome == DealOutcome.EXPIRED:
233
+ return 0.0
234
+ if deal.agreed_price is None:
235
+ return 0.0
236
+
237
+ surplus = task.buyer_budget - deal.agreed_price
238
+ max_surplus = task.buyer_budget - task.seller_cost
239
+ surplus_score = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
240
+
241
+ deadline = task.buyer_deadline or 5
242
+ deadline_bonus = 1.0 if deal.rounds_taken <= deadline else 0.5
243
+
244
+ score = surplus_score * deadline_bonus
245
+ return max(0.0, min(1.0, score))
246
+
247
+
248
+ def grade_career_10(results: list[DealRecord], task: TaskConfig) -> float:
249
+ if not results:
250
+ return 0.0
251
+
252
+ rounds_per_ep = task.max_steps // task.total_episodes
253
+ weighted_scores = []
254
+
255
+ for deal in results:
256
+ if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
257
+ weighted_scores.append(0.0)
258
+ continue
259
+
260
+ surplus = task.buyer_budget - deal.agreed_price
261
+ max_surplus = task.buyer_budget - task.seller_cost
262
+ norm_surplus = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
263
+
264
+ efficiency = max(0.0, 1.0 - (deal.rounds_taken / rounds_per_ep) * 0.3)
265
+ weighted_scores.append(norm_surplus * efficiency)
266
+
267
+ score = sum(weighted_scores) / max(len(weighted_scores), 1)
268
+ return max(0.0, min(1.0, score))
269
+
270
+
271
+ def grade_personality_task(results: list[DealRecord], task: TaskConfig) -> float:
272
+ """Generic grader for personality tasks -- same as single_deal but per-episode mean."""
273
+ if not results:
274
+ return 0.0
275
+
276
+ scores = []
277
+ for deal in results:
278
+ if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
279
+ scores.append(0.0)
280
+ continue
281
+ surplus = task.buyer_budget - deal.agreed_price
282
+ max_surplus = task.buyer_budget - task.seller_cost
283
+ norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
284
+ scores.append(norm)
285
+
286
+ return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))
287
+
288
+
289
+ def grade_read_the_tells(results: list[DealRecord], task: TaskConfig) -> float:
290
+ """Bonus for reading deception -- closing well below midpoint earns extra."""
291
+ if not results:
292
+ return 0.0
293
+
294
+ midpoint = (task.buyer_budget + task.seller_cost) / 2
295
+ scores = []
296
+
297
+ for deal in results:
298
+ if deal.outcome != DealOutcome.DEAL or deal.agreed_price is None:
299
+ scores.append(0.0)
300
+ continue
301
+ surplus = task.buyer_budget - deal.agreed_price
302
+ max_surplus = task.buyer_budget - task.seller_cost
303
+ norm = max(0.0, surplus / max_surplus) if max_surplus > 0 else 0.0
304
+
305
+ # Bonus for closing below midpoint (reading the bluff)
306
+ if deal.agreed_price < midpoint:
307
+ bluff_bonus = 0.15 * ((midpoint - deal.agreed_price) / (midpoint - task.seller_cost))
308
+ norm = min(1.0, norm + bluff_bonus)
309
+
310
+ scores.append(norm)
311
+
312
+ return max(0.0, min(1.0, sum(scores) / max(len(scores), 1)))
313
+
314
+
315
+ def grade_amazon_realistic(results: list[DealRecord], task: TaskConfig) -> float:
316
+ """Grader for real-listing tasks: relies on per-episode normalized_surplus
317
+ (which uses the seller's episode cost, not the task's default cost)."""
318
+ if not results:
319
+ return 0.0
320
+ deal = results[0]
321
+ if deal.outcome != DealOutcome.DEAL:
322
+ return 0.0
323
+ return max(0.0, min(1.0, deal.normalized_surplus))
324
+
325
+
326
+ GRADERS = {
327
+ "single_deal": grade_single_deal,
328
+ "asymmetric_pressure": grade_asymmetric_pressure,
329
+ "career_10": grade_career_10,
330
+ "deceptive_seller": grade_personality_task,
331
+ "impatient_seller": grade_personality_task,
332
+ "collaborative_seller": grade_personality_task,
333
+ "read_the_tells": grade_read_the_tells,
334
+ "marketplace_arena": grade_personality_task,
335
+ "amazon_realistic": grade_amazon_realistic,
336
+ }
nlp/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .extractor import TellExtractor
2
+
3
+ __all__ = ["TellExtractor"]
nlp/eval_extractor.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evaluate the NLP tell extractor against Chicago HAI human-labeled rows.
2
+
3
+ Compares ministral-3:3b zero-shot extraction to Chicago HAI ground-truth labels:
4
+ - firm_soft β†’ verbal_confidence (binary: f=high, s=low)
5
+ - external_incentive=y β†’ verbal_deception_cue (claim of outside pressure)
6
+ - category β†’ loosely informs urgency/confidence
7
+
8
+ Also runs a rule-based control baseline (no LLM) for comparison.
9
+
10
+ Logs to runs/{ts}_extractor_eval/ via RunLogger.
11
+
12
+ Usage:
13
+ PYTHONPATH=. .venv/bin/python nlp/eval_extractor.py [--n 500] [--model ministral-3:3b]
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import pathlib
19
+ import time
20
+ from collections import defaultdict
21
+
22
+ from nlp.extractor import TellExtractor, _condition_from_text, DEFAULT_TELL
23
+ from utils.run_logger import RunLogger
24
+
25
+ LABELED_ROWS = pathlib.Path("nlp/data/chicago_hai_bargaining.jsonl")
26
+
27
+
28
+ def load_labeled(min_len: int = 10) -> list[dict]:
29
+ """Load Chicago HAI rows that have at least one human label."""
30
+ rows = []
31
+ with open(LABELED_ROWS) as f:
32
+ for line in f:
33
+ r = json.loads(line)
34
+ has_label = bool(r["category"] or r["firm_soft"] or r["external_incentive"])
35
+ if has_label and len(r["utterance"]) >= min_len:
36
+ rows.append(r)
37
+ return rows
38
+
39
+
40
+ def rule_based_predict(utterance: str) -> dict:
41
+ """Control baseline: condition rules only, defaults elsewhere."""
42
+ cond_score, dep_score, cond_label = _condition_from_text(utterance)
43
+ out = dict(DEFAULT_TELL)
44
+ out["condition_score"] = cond_score
45
+ out["depreciation_score"] = dep_score
46
+ out["condition_label"] = cond_label
47
+ return out
48
+
49
+
50
+ def score_row(predicted: dict, gold: dict, row: dict) -> dict:
51
+ """Per-row scoring against Chicago HAI labels."""
52
+ out = {
53
+ "abs_err_urgency": abs(predicted["verbal_urgency"] - gold["verbal_urgency"]),
54
+ "abs_err_confidence": abs(predicted["verbal_confidence"] - gold["verbal_confidence"]),
55
+ "abs_err_deception": abs(predicted["verbal_deception_cue"] - gold["verbal_deception_cue"]),
56
+ }
57
+
58
+ # Binary firm/soft accuracy: gold confidence β‰₯ 0.5 = firm
59
+ if row["firm_soft"]:
60
+ gold_firm = row["firm_soft"] == "f"
61
+ pred_firm = predicted["verbal_confidence"] >= 0.5
62
+ out["firm_correct"] = int(gold_firm == pred_firm)
63
+
64
+ # External incentive (deception) recall: gold y β†’ pred deception β‰₯ 0.4
65
+ if row["external_incentive"] == "y":
66
+ out["deception_flagged"] = int(predicted["verbal_deception_cue"] >= 0.4)
67
+
68
+ return out
69
+
70
+
71
+ def aggregate(per_row: list[dict]) -> dict:
72
+ """Roll up per-row scores into a summary."""
73
+ sums = defaultdict(list)
74
+ for r in per_row:
75
+ for k, v in r.items():
76
+ if isinstance(v, (int, float)) and not isinstance(v, bool):
77
+ sums[k].append(v)
78
+ return {f"mean_{k}": round(sum(v) / len(v), 4) for k, v in sums.items() if v}
79
+
80
+
81
+ def run_pass(rows: list[dict], predict_fn, name: str, log) -> dict:
82
+ """Run one extraction pass over labeled rows."""
83
+ print(f"\n[{name}] running on {len(rows)} rows ...")
84
+ per_row = []
85
+ t0 = time.time()
86
+ for i, row in enumerate(rows):
87
+ try:
88
+ pred = predict_fn(row["utterance"])
89
+ except Exception as e:
90
+ print(f" ! row {i} failed: {e}")
91
+ continue
92
+
93
+ scored = score_row(pred, row["tell_supervision"], row)
94
+ log.metric({
95
+ **scored,
96
+ "pass": name,
97
+ "row_idx": i,
98
+ "utterance_preview": row["utterance"][:80],
99
+ })
100
+ per_row.append(scored)
101
+
102
+ if (i + 1) % 50 == 0:
103
+ elapsed = time.time() - t0
104
+ rate = (i + 1) / elapsed
105
+ eta = (len(rows) - i - 1) / rate
106
+ print(f" [{i+1}/{len(rows)}] {rate:.2f} rows/s ETA {eta:.0f}s")
107
+
108
+ elapsed = time.time() - t0
109
+ print(f"[{name}] done in {elapsed:.1f}s")
110
+ summary = aggregate(per_row)
111
+ summary["n"] = len(per_row)
112
+ summary["elapsed_s"] = round(elapsed, 1)
113
+ return summary
114
+
115
+
116
+ def main():
117
+ parser = argparse.ArgumentParser()
118
+ parser.add_argument("--n", type=int, default=500, help="Cap on labeled rows")
119
+ parser.add_argument("--model", type=str, default="ministral-3:3b")
120
+ parser.add_argument("--seed", type=int, default=42)
121
+ args = parser.parse_args()
122
+
123
+ import random
124
+ random.seed(args.seed)
125
+
126
+ rows = load_labeled()
127
+ print(f"Loaded {len(rows)} labeled rows from Chicago HAI")
128
+ if args.n and args.n < len(rows):
129
+ rows = random.sample(rows, args.n)
130
+ print(f"Sampled {args.n} rows for eval")
131
+
132
+ extractor = TellExtractor(model=args.model)
133
+
134
+ with RunLogger("extractor_eval") as log:
135
+ log.config({
136
+ "model": args.model,
137
+ "n_rows": len(rows),
138
+ "seed": args.seed,
139
+ "labeled_source": "chicago_hai_bargaining.jsonl",
140
+ })
141
+
142
+ rule_summary = run_pass(rows, rule_based_predict, "rule_based", log)
143
+ ministral_summary = run_pass(rows, extractor.extract, args.model, log)
144
+
145
+ comparison = {
146
+ "rule_based": rule_summary,
147
+ args.model: ministral_summary,
148
+ "deltas": {
149
+ k.replace("mean_", "delta_"): ministral_summary.get(k, 0) - rule_summary.get(k, 0)
150
+ for k in rule_summary
151
+ if k.startswith("mean_") and k in ministral_summary
152
+ },
153
+ }
154
+ log.summary(comparison)
155
+
156
+ print("\n=== SUMMARY ===")
157
+ print(json.dumps(comparison, indent=2))
158
+ print(f"\nFull logs: {log.dir}")
159
+
160
+
161
+ if __name__ == "__main__":
162
+ main()
nlp/extractor.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """NLP Tell Extractor β€” reads seller utterances, returns TellObservation.
2
+
3
+ Uses a local Ollama model (default: gemma4:e2b) to extract structured signals
4
+ from free-text seller messages. Output schema matches TellObservation in
5
+ bazaarbot_env/models.py β€” same fields, same ranges.
6
+
7
+ The extractor runs as a post-processing step after the seller speaks. For the
8
+ rule-based seller it's a cross-check; for the LLM seller it's the primary
9
+ tell source.
10
+
11
+ Usage:
12
+ from nlp.extractor import TellExtractor
13
+ extractor = TellExtractor()
14
+ tells = extractor.extract("bhai last price hai, kal se badhega", history=[...])
15
+
16
+ Standalone test:
17
+ python nlp/extractor.py
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import re
24
+ import textwrap
25
+ from typing import Optional
26
+ import requests
27
+
28
+
29
+ OLLAMA_URL = "http://localhost:11434/api/generate"
30
+ DEFAULT_MODEL = "ministral-3:3b"
31
+
32
+ # ── Condition vocabulary ──────────────────────────────────────────
33
+
34
+ # eBay standardized grades β†’ (condition_score, depreciation_score, label)
35
+ CONDITION_GRADES: list[tuple[list[str], float, float, str]] = [
36
+ (
37
+ ["new", "sealed", "mint", "mib", "mint in box", "brand new", "unused",
38
+ "box band", "seal pack", "sealed pack", "never opened", "factory sealed"],
39
+ 1.0, 0.0, "new",
40
+ ),
41
+ (
42
+ ["like new", "open box", "barely used", "3 months", "6 months",
43
+ "thoda use", "thoda sa use", "bilkul sahi", "almost new", "excellent"],
44
+ 0.85, 0.10, "like_new",
45
+ ),
46
+ (
47
+ ["very good", "vgc", "minor scratch", "ek chhota scratch", "small scratch",
48
+ "light scratch", "minor wear", "slight", "good condition"],
49
+ 0.70, 0.25, "very_good",
50
+ ),
51
+ (
52
+ ["good", "guc", "some scratches", "few scratches", "normal wear",
53
+ "works perfectly", "fully functional", "theek kaam", "sahi kaam"],
54
+ 0.55, 0.40, "good",
55
+ ),
56
+ (
57
+ ["acceptable", "heavy scratch", "dent", "battery low", "battery thodi kam",
58
+ "screen crack", "needs repair", "rough", "worn", "purana hai"],
59
+ 0.35, 0.60, "acceptable",
60
+ ),
61
+ (
62
+ ["for parts", "broken", "dead", "not working", "kharab", "kaam nahi karta",
63
+ "damaged", "junk"],
64
+ 0.10, 0.90, "junk",
65
+ ),
66
+ ]
67
+
68
+
69
+ def _condition_from_text(text: str) -> tuple[float, float, str]:
70
+ """Rule-based fast pass for condition signals before LLM extraction."""
71
+ lower = text.lower()
72
+ for keywords, score, dep, label in CONDITION_GRADES:
73
+ for kw in keywords:
74
+ if kw in lower:
75
+ return score, dep, label
76
+ return 1.0, 0.0, "unknown"
77
+
78
+
79
+ # ── Hinglish few-shot examples for the extractor prompt ──────────
80
+
81
+ HINGLISH_FEW_SHOTS = """
82
+ Utterance: "bhai last price hai, kal se price badhega"
83
+ Tells: {"verbal_urgency": 0.75, "verbal_confidence": 0.6, "verbal_deception_cue": 0.5, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.3, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
84
+
85
+ Utterance: "ek chhota sa scratch hai screen pe, baaki sab bilkul theek hai"
86
+ Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.6, "verbal_deception_cue": 0.2, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.7, "depreciation_score": 0.25, "condition_label": "very_good"}
87
+
88
+ Utterance: "abhi teen aur log dekh rahe hain, aaj hi lena padega"
89
+ Tells: {"verbal_urgency": 0.8, "verbal_confidence": 0.7, "verbal_deception_cue": 0.75, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.4, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
90
+
91
+ Utterance: "battery thodi kam hai, 79% hai, magar charger ke saath deta hoon"
92
+ Tells: {"verbal_urgency": 0.2, "verbal_confidence": 0.5, "verbal_deception_cue": 0.1, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.35, "depreciation_score": 0.6, "condition_label": "acceptable"}
93
+
94
+ Utterance: "box band hai, seal packed, maine khola bhi nahi"
95
+ Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.8, "verbal_deception_cue": 0.0, "offer_speed": "normal", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "new"}
96
+ """.strip()
97
+
98
+ # Chicago HAI examples (English formal negotiations)
99
+ CHICAGO_FEW_SHOTS = """
100
+ Utterance: "I have another buyer coming in an hour, this is my final offer"
101
+ Tells: {"verbal_urgency": 0.7, "verbal_confidence": 0.75, "verbal_deception_cue": 0.65, "offer_speed": "instant", "concession_pattern": "stalling", "emotional_escalation": 0.3, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
102
+
103
+ Utterance: "Minor scratches on the back, fully functional, battery health 81%"
104
+ Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.6, "verbal_deception_cue": 0.15, "offer_speed": "deliberate", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 0.55, "depreciation_score": 0.4, "condition_label": "good"}
105
+
106
+ Utterance: "Okay fine, I can do 4500, but that is absolutely the lowest I'll go"
107
+ Tells: {"verbal_urgency": 0.5, "verbal_confidence": 0.55, "verbal_deception_cue": 0.3, "offer_speed": "deliberate", "concession_pattern": "front_loaded", "emotional_escalation": 0.35, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "unknown"}
108
+
109
+ Utterance: "MIB, never opened, still has the plastic wrap on it"
110
+ Tells: {"verbal_urgency": 0.1, "verbal_confidence": 0.85, "verbal_deception_cue": 0.0, "offer_speed": "normal", "concession_pattern": "steady", "emotional_escalation": 0.0, "condition_score": 1.0, "depreciation_score": 0.0, "condition_label": "new"}
111
+ """.strip()
112
+
113
+
114
+ EXTRACTION_SYSTEM_PROMPT = textwrap.dedent(f"""\
115
+ You extract structured negotiation signals from a seller's message.
116
+ Output ONLY a single valid JSON object. No prose, no markdown, no explanation.
117
+
118
+ Output schema (all fields required):
119
+ {{
120
+ "verbal_urgency": <0.0–1.0, how desperate/pressured the seller sounds>,
121
+ "verbal_confidence": <0.0–1.0, how assertive/firm the seller sounds>,
122
+ "verbal_deception_cue": <0.0–1.0, signs of bluffing: social proof claims, fake scarcity, over-justification>,
123
+ "offer_speed": <"instant"|"normal"|"deliberate">,
124
+ "concession_pattern": <"steady"|"front_loaded"|"stalling"|"erratic">,
125
+ "emotional_escalation": <0.0–1.0, how emotionally charged the message is>,
126
+ "condition_score": <0.0–1.0, item condition from 0=junk to 1=mint. 1.0 if no condition info>,
127
+ "depreciation_score": <0.0–1.0, wear/damage level. 0.0 if no condition info>,
128
+ "condition_label": <"new"|"like_new"|"very_good"|"good"|"acceptable"|"junk"|"unknown">
129
+ }}
130
+
131
+ Calibration rules:
132
+ - Social proof ("another buyer", "3 log dekh rahe", "bahut demand hai") β†’ verbal_deception_cue β‰₯ 0.6
133
+ - "Final price", "last offer", "bilkul nahi jaaunga" β†’ verbal_confidence β‰₯ 0.7
134
+ - Time pressure claims ("kal se badhega", "aaj hi") β†’ verbal_urgency β‰₯ 0.65
135
+ - Condition disclosures lower condition_score from 1.0; no disclosure = keep 1.0
136
+ - "Firm" language = verbal_confidence β‰₯ 0.75; "Soft/flexible" = ≀ 0.35
137
+
138
+ Examples (Hinglish):
139
+ {HINGLISH_FEW_SHOTS}
140
+
141
+ Examples (English):
142
+ {CHICAGO_FEW_SHOTS}
143
+ """)
144
+
145
+ DEFAULT_TELL = {
146
+ "verbal_urgency": 0.2,
147
+ "verbal_confidence": 0.5,
148
+ "verbal_deception_cue": 0.0,
149
+ "offer_speed": "normal",
150
+ "concession_pattern": "steady",
151
+ "emotional_escalation": 0.0,
152
+ "condition_score": 1.0,
153
+ "depreciation_score": 0.0,
154
+ "condition_label": "unknown",
155
+ }
156
+
157
+ VALID_OFFER_SPEEDS = {"instant", "normal", "deliberate"}
158
+ VALID_CONCESSION_PATTERNS = {"steady", "front_loaded", "stalling", "erratic"}
159
+ VALID_CONDITION_LABELS = {"new", "like_new", "very_good", "good", "acceptable", "junk", "unknown"}
160
+
161
+
162
+ def _clamp(v, lo=0.0, hi=1.0) -> float:
163
+ try:
164
+ return max(lo, min(hi, float(v)))
165
+ except (TypeError, ValueError):
166
+ return (lo + hi) / 2
167
+
168
+
169
+ def _parse_extraction(raw: str) -> dict:
170
+ """Parse JSON from LLM output, clamp ranges, fill missing fields."""
171
+ s = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
172
+ if "```" in s:
173
+ parts = s.split("```")
174
+ s = parts[1].lstrip("json").strip() if len(parts) >= 2 else s
175
+ start, end = s.find("{"), s.rfind("}") + 1
176
+ if start >= 0 and end > start:
177
+ s = s[start:end]
178
+ try:
179
+ parsed = json.loads(s)
180
+ except Exception:
181
+ return dict(DEFAULT_TELL)
182
+
183
+ out = dict(DEFAULT_TELL)
184
+ out["verbal_urgency"] = _clamp(parsed.get("verbal_urgency", out["verbal_urgency"]))
185
+ out["verbal_confidence"] = _clamp(parsed.get("verbal_confidence", out["verbal_confidence"]))
186
+ out["verbal_deception_cue"] = _clamp(parsed.get("verbal_deception_cue", out["verbal_deception_cue"]))
187
+ out["emotional_escalation"] = _clamp(parsed.get("emotional_escalation", out["emotional_escalation"]))
188
+ out["condition_score"] = _clamp(parsed.get("condition_score", out["condition_score"]))
189
+ out["depreciation_score"] = _clamp(parsed.get("depreciation_score", out["depreciation_score"]))
190
+
191
+ speed = parsed.get("offer_speed", "normal")
192
+ out["offer_speed"] = speed if speed in VALID_OFFER_SPEEDS else "normal"
193
+
194
+ pattern = parsed.get("concession_pattern", "steady")
195
+ out["concession_pattern"] = pattern if pattern in VALID_CONCESSION_PATTERNS else "steady"
196
+
197
+ label = parsed.get("condition_label", "unknown")
198
+ out["condition_label"] = label if label in VALID_CONDITION_LABELS else "unknown"
199
+
200
+ return out
201
+
202
+
203
+ class TellExtractor:
204
+ """Extracts TellObservation fields from seller free text via Ollama."""
205
+
206
+ def __init__(self, model: str = DEFAULT_MODEL, ollama_url: str = OLLAMA_URL):
207
+ self.model = model
208
+ self.ollama_url = ollama_url
209
+
210
+ def _call_ollama(self, prompt: str) -> str:
211
+ payload = {
212
+ "model": self.model,
213
+ "prompt": prompt,
214
+ "stream": False,
215
+ "options": {"temperature": 0.1, "num_predict": 256},
216
+ }
217
+ try:
218
+ resp = requests.post(self.ollama_url, json=payload, timeout=120)
219
+ resp.raise_for_status()
220
+ return resp.json().get("response", "")
221
+ except Exception as e:
222
+ print(f"[extractor] Ollama call failed: {e}")
223
+ return ""
224
+
225
+ def extract(
226
+ self,
227
+ message: str,
228
+ history: Optional[list[str]] = None,
229
+ fast: bool = False,
230
+ ) -> dict:
231
+ """Extract tell signals from a seller utterance.
232
+
233
+ Args:
234
+ message: the seller's current utterance
235
+ history: last N turns as strings (for context)
236
+ fast: if True, skip LLM and use rule-based condition extraction only
237
+ (useful during GRPO rollouts where latency matters)
238
+
239
+ Returns:
240
+ dict matching TellObservation field names
241
+ """
242
+ # Fast path: rule-based condition extraction, defaults for everything else
243
+ cond_score, dep_score, cond_label = _condition_from_text(message)
244
+ if fast:
245
+ result = dict(DEFAULT_TELL)
246
+ result["condition_score"] = cond_score
247
+ result["depreciation_score"] = dep_score
248
+ result["condition_label"] = cond_label
249
+ return result
250
+
251
+ history_block = ""
252
+ if history:
253
+ recent = history[-3:]
254
+ history_block = "\nRecent conversation:\n" + "\n".join(recent) + "\n"
255
+
256
+ user_prompt = (
257
+ f"{history_block}"
258
+ f'\nSeller says: "{message}"\n\n'
259
+ "Extract tells as JSON:"
260
+ )
261
+
262
+ full_prompt = EXTRACTION_SYSTEM_PROMPT + "\n\n" + user_prompt
263
+ raw = self._call_ollama(full_prompt)
264
+
265
+ if not raw:
266
+ result = dict(DEFAULT_TELL)
267
+ result["condition_score"] = cond_score
268
+ result["depreciation_score"] = dep_score
269
+ result["condition_label"] = cond_label
270
+ return result
271
+
272
+ result = _parse_extraction(raw)
273
+
274
+ # Rule-based condition always wins over LLM for condition fields.
275
+ # Keyword matching on explicit condition phrases ("minor scratches",
276
+ # "box band", "MIB") is more reliable than LLM inference for this
277
+ # narrow vocabulary. LLM is better at urgency/deception where context
278
+ # and tone matter more than keyword lookup.
279
+ if cond_label != "unknown":
280
+ result["condition_score"] = cond_score
281
+ result["depreciation_score"] = dep_score
282
+ result["condition_label"] = cond_label
283
+
284
+ return result
285
+
286
+ def batch_extract(self, messages: list[str]) -> list[dict]:
287
+ return [self.extract(m) for m in messages]
288
+
289
+
290
+ # ── Standalone test ───────────────────────────────────────────────
291
+
292
+ TEST_UTTERANCES = [
293
+ # Hinglish urgency + social proof (deceptive)
294
+ "bhai last price hai, abhi teen aur log dekh rahe hain",
295
+ # Hinglish condition disclosure
296
+ "ek chhota sa scratch hai screen pe, battery 81% hai, baaki sab theek",
297
+ # Hinglish sealed
298
+ "box band hai, seal packed, maine kabhi khola nahi",
299
+ # English deceptive pressure
300
+ "I have another buyer coming in an hour, this is my absolute final offer",
301
+ # English condition
302
+ "Minor scratches on the back panel, fully functional, screen is perfect",
303
+ # English collaborative
304
+ "Look, I'll be honest with you β€” I paid 8000 for it, I just need 6500 to break even",
305
+ # eBay lingo
306
+ "MIB, never opened, still has factory seal",
307
+ # Impatient
308
+ "6000. Yes or no. I don't have all day.",
309
+ ]
310
+
311
+ if __name__ == "__main__":
312
+ extractor = TellExtractor()
313
+ print(f"Using model: {extractor.model}\n")
314
+ print("=" * 60)
315
+
316
+ for utt in TEST_UTTERANCES:
317
+ print(f"Utterance: {utt}")
318
+ tells = extractor.extract(utt)
319
+ print(f" urgency={tells['verbal_urgency']:.2f} "
320
+ f"confidence={tells['verbal_confidence']:.2f} "
321
+ f"deception={tells['verbal_deception_cue']:.2f} "
322
+ f"speed={tells['offer_speed']}")
323
+ print(f" condition={tells['condition_label']} "
324
+ f"score={tells['condition_score']:.2f} "
325
+ f"depreciation={tells['depreciation_score']:.2f}")
326
+ print()
nlp/fetch_datasets.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch and cache negotiation datasets used for NLP extractor supervision.
2
+
3
+ Datasets:
4
+ 1. stanfordnlp/craigslist_bargains β€” per-turn intent labels (init-price/accept/reject)
5
+ 2. ChicagoHAI/language-of-bargaining β€” per-turn bargaining act + Firm/Soft + External Incentive
6
+ 3. casino β€” multi-issue strategy annotations
7
+
8
+ Run:
9
+ python nlp/fetch_datasets.py
10
+
11
+ Outputs written to nlp/data/:
12
+ craigslist_bargains.jsonl
13
+ chicago_hai_bargaining.jsonl
14
+ casino.jsonl
15
+ extractor_supervision.jsonl ← merged supervision set for NLP extractor fine-tune
16
+ """
17
+
18
+ import json
19
+ import pathlib
20
+ from datasets import load_dataset
21
+
22
+ OUT = pathlib.Path(__file__).parent / "data"
23
+ OUT.mkdir(exist_ok=True)
24
+
25
+
26
+ # ── Chicago HAI: Category β†’ verbal tell mapping ───────────────────
27
+ # Derived from ACL 2023 paper taxonomy
28
+ CHICAGO_CATEGORY_MAP = {
29
+ "offer": {"verbal_urgency": 0.2, "verbal_confidence": 0.7},
30
+ "counter-offer": {"verbal_urgency": 0.3, "verbal_confidence": 0.6},
31
+ "accept": {"verbal_urgency": 0.1, "verbal_confidence": 0.8},
32
+ "reject": {"verbal_urgency": 0.4, "verbal_confidence": 0.5},
33
+ "information": {"verbal_urgency": 0.1, "verbal_confidence": 0.6},
34
+ "threat": {"verbal_urgency": 0.7, "verbal_confidence": 0.8},
35
+ "appeal": {"verbal_urgency": 0.5, "verbal_confidence": 0.4},
36
+ "other": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
37
+ }
38
+
39
+ FIRM_SOFT_MAP = {
40
+ "Firm": 0.85,
41
+ "Soft": 0.25,
42
+ "": 0.5,
43
+ }
44
+
45
+ # Non-empty External Incentive = social proof / bluff signal
46
+ EXTERNAL_INCENTIVE_DECEPTION = 0.65
47
+
48
+
49
+ # ── CaSiNo: strategy β†’ tell mapping ──────────────────────────────
50
+ # CaSiNo annotates with: no-need, self-need, other-need, vouch-fair,
51
+ # showing-concern, no-deal, coordination, empathy, small-talk
52
+ CASINO_STRATEGY_MAP = {
53
+ "no-need": {"verbal_urgency": 0.1, "verbal_deception_cue": 0.3},
54
+ "self-need": {"verbal_urgency": 0.6, "verbal_deception_cue": 0.1},
55
+ "other-need": {"verbal_urgency": 0.3, "verbal_deception_cue": 0.4},
56
+ "vouch-fair": {"verbal_urgency": 0.2, "verbal_confidence": 0.7},
57
+ "showing-concern": {"verbal_urgency": 0.3, "verbal_confidence": 0.4},
58
+ "no-deal": {"verbal_urgency": 0.5, "verbal_confidence": 0.8},
59
+ "coordination": {"verbal_urgency": 0.2, "verbal_confidence": 0.6},
60
+ "empathy": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
61
+ "small-talk": {"verbal_urgency": 0.05, "verbal_confidence": 0.5},
62
+ }
63
+
64
+
65
+ def _default_tell() -> dict:
66
+ return {
67
+ "verbal_urgency": 0.2,
68
+ "verbal_confidence": 0.5,
69
+ "verbal_deception_cue": 0.0,
70
+ "condition_score": 1.0,
71
+ "depreciation_score": 0.0,
72
+ "condition_label": "unknown",
73
+ }
74
+
75
+
76
+ def fetch_craigslist():
77
+ # Load from local CodaLab downloads: data/train.json + data/dev.json (gzipped)
78
+ # Source: https://worksheets.codalab.org/worksheets/0x453913e76b65495d8b9730d41c7e0a0c
79
+ # Schema: events list with action in {message, offer, accept, reject, quit}
80
+ # No per-turn intent labels β€” derive from action type
81
+ import gzip, pathlib
82
+
83
+ ACTION_TELL_MAP = {
84
+ "message": {"verbal_urgency": 0.2, "verbal_confidence": 0.5},
85
+ "offer": {"verbal_urgency": 0.35, "verbal_confidence": 0.7},
86
+ "accept": {"verbal_urgency": 0.1, "verbal_confidence": 0.8},
87
+ "reject": {"verbal_urgency": 0.45, "verbal_confidence": 0.55},
88
+ "quit": {"verbal_urgency": 0.6, "verbal_confidence": 0.6},
89
+ }
90
+
91
+ rows = []
92
+ for split in ("train", "dev"):
93
+ path = pathlib.Path(f"data/{split}.json")
94
+ if not path.exists():
95
+ print(f" ! data/{split}.json not found, skipping")
96
+ continue
97
+ print(f" Loading data/{split}.json ...")
98
+ try:
99
+ with gzip.open(path) as f:
100
+ examples = json.load(f)
101
+ except Exception:
102
+ # Try plain JSON if not gzipped
103
+ examples = json.loads(path.read_text())
104
+
105
+ for ex in examples:
106
+ kbs = ex.get("scenario", {}).get("kbs", [{}, {}])
107
+ # agent 0 = buyer (Role in personal), agent 1 = seller
108
+ agent_roles = {}
109
+ for kb in kbs:
110
+ role = kb.get("personal", {}).get("Role", "")
111
+ # agent index inferred from role
112
+ if role == "buyer":
113
+ agent_roles[0] = "buyer"
114
+ elif role == "seller":
115
+ agent_roles[1] = "seller"
116
+
117
+ outcome = ex.get("outcome", {})
118
+ deal_price = (outcome.get("offer") or {}).get("price")
119
+
120
+ for ev in ex.get("events", []):
121
+ action = ev.get("action", "")
122
+ text = ev.get("data", "")
123
+ if action != "message" or not isinstance(text, str) or len(text) < 5:
124
+ continue
125
+
126
+ agent_idx = ev.get("agent", 0)
127
+ role = agent_roles.get(agent_idx, "unknown")
128
+
129
+ tell = _default_tell()
130
+ tell.update(ACTION_TELL_MAP.get(action, {}))
131
+
132
+ # Derive condition signals from listing description if present
133
+ item = ex.get("scenario", {}).get("kbs", [{}])[0].get("item", {})
134
+ desc = " ".join(item.get("Description", []) or [])
135
+ if desc:
136
+ try:
137
+ from nlp.extractor import _condition_from_text
138
+ cond_score, dep_score, cond_label = _condition_from_text(desc)
139
+ if cond_label != "unknown":
140
+ tell["condition_score"] = cond_score
141
+ tell["depreciation_score"] = dep_score
142
+ tell["condition_label"] = cond_label
143
+ except ImportError:
144
+ pass
145
+
146
+ rows.append({
147
+ "source": "craigslist_bargains",
148
+ "role": role,
149
+ "utterance": text,
150
+ "action": action,
151
+ "deal_price": deal_price,
152
+ "tell_supervision": tell,
153
+ })
154
+
155
+ path = OUT / "craigslist_bargains.jsonl"
156
+ with open(path, "w") as f:
157
+ for r in rows:
158
+ f.write(json.dumps(r, ensure_ascii=False) + "\n")
159
+ print(f" β†’ {len(rows)} turns written to {path}")
160
+ return rows
161
+
162
+ rows = []
163
+ for split in ("train", "validation"):
164
+ for ex in ds[split]:
165
+ utterances = ex["utterance"]
166
+ acts = ex["dialogue_acts"]
167
+ roles = [ex["agent_info"]["Role"][t] for t in ex["agent_turn"]]
168
+ item_price = ex["items"]["Price"][0] if ex["items"]["Price"] else None
169
+
170
+ for i, (utt, role) in enumerate(zip(utterances, roles)):
171
+ intent = acts["intent"][i] if acts and acts["intent"] else ""
172
+ price_val = acts["price"][i] if acts and acts["price"] else -1.0
173
+
174
+ tell = _default_tell()
175
+ if intent == "accept":
176
+ tell["verbal_urgency"] = 0.1
177
+ tell["verbal_confidence"] = 0.8
178
+ elif intent == "reject":
179
+ tell["verbal_urgency"] = 0.4
180
+ tell["verbal_confidence"] = 0.5
181
+ elif intent == "init-price":
182
+ tell["verbal_confidence"] = 0.75
183
+
184
+ rows.append({
185
+ "source": "craigslist_bargains",
186
+ "role": role,
187
+ "utterance": utt,
188
+ "intent": intent,
189
+ "price": float(price_val) if price_val and price_val != -1.0 else None,
190
+ "item_price": float(item_price) if item_price else None,
191
+ "tell_supervision": tell,
192
+ })
193
+
194
+ path = OUT / "craigslist_bargains.jsonl"
195
+ with open(path, "w") as f:
196
+ for r in rows:
197
+ f.write(json.dumps(r, ensure_ascii=False) + "\n")
198
+ print(f" β†’ {len(rows)} turns written to {path}")
199
+ return rows
200
+
201
+
202
+ def fetch_chicago_hai():
203
+ # Load from local negotiations_public_release/nl/ β€” 178 JSON files
204
+ # HF version is broken; we have the data zip locally already.
205
+ # Label taxonomy (from data exploration):
206
+ # Category: p=price, n=new-offer, c=counter, r=reject, a=accept, e=exit
207
+ # Firm or Soft: f=firm, s=soft
208
+ # External Incentive: y=yes (social proof / outside pressure claim)
209
+ import pathlib
210
+ nl_dir = pathlib.Path("negotiations_public_release/nl")
211
+ if not nl_dir.exists():
212
+ print(" ! negotiations_public_release/nl not found, skipping Chicago HAI")
213
+ return []
214
+
215
+ print(f"Loading Chicago HAI from {nl_dir} ({len(list(nl_dir.glob('*.json')))} files) ...")
216
+
217
+ CATEGORY_MAP = {
218
+ "p": {"verbal_urgency": 0.3, "verbal_confidence": 0.7}, # price proposal
219
+ "n": {"verbal_urgency": 0.4, "verbal_confidence": 0.65}, # new offer
220
+ "c": {"verbal_urgency": 0.35, "verbal_confidence": 0.6}, # counter
221
+ "r": {"verbal_urgency": 0.5, "verbal_confidence": 0.5}, # reject
222
+ "a": {"verbal_urgency": 0.1, "verbal_confidence": 0.8}, # accept
223
+ "e": {"verbal_urgency": 0.6, "verbal_confidence": 0.7}, # exit/walk
224
+ }
225
+ FIRM_MAP = {"f": 0.80, "s": 0.25}
226
+
227
+ rows = []
228
+ for fpath in sorted(nl_dir.glob("*.json")):
229
+ try:
230
+ raw = fpath.read_text().replace(": NaN", ": null")
231
+ d = json.loads(raw)
232
+ except Exception:
233
+ continue
234
+
235
+ for turn_words in d.get("turns", []):
236
+ if not isinstance(turn_words, list) or not turn_words:
237
+ continue
238
+
239
+ # Reconstruct utterance by joining Word fields
240
+ utterance = " ".join(
241
+ w.get("Word", "") for w in turn_words if w.get("Word")
242
+ ).strip()
243
+ if len(utterance) < 5:
244
+ continue
245
+
246
+ role = turn_words[0].get("Role", "")
247
+
248
+ # Take labels from last word that has them (annotation is span-level)
249
+ category, firm_soft, ext_incentive = "", "", ""
250
+ for w in reversed(turn_words):
251
+ if not category and w.get("Category"):
252
+ category = str(w["Category"]).strip()
253
+ if not firm_soft and w.get("Firm or Soft"):
254
+ firm_soft = str(w["Firm or Soft"]).strip()
255
+ if not ext_incentive and w.get("External Incentive"):
256
+ ext_incentive = str(w["External Incentive"]).strip()
257
+
258
+ tell = _default_tell()
259
+ tell.update(CATEGORY_MAP.get(category, {}))
260
+ if firm_soft in FIRM_MAP:
261
+ tell["verbal_confidence"] = FIRM_MAP[firm_soft]
262
+ if ext_incentive == "y":
263
+ tell["verbal_deception_cue"] = EXTERNAL_INCENTIVE_DECEPTION
264
+
265
+ rows.append({
266
+ "source": "chicago_hai",
267
+ "role": role,
268
+ "utterance": utterance,
269
+ "category": category,
270
+ "firm_soft": firm_soft,
271
+ "external_incentive": ext_incentive,
272
+ "tell_supervision": tell,
273
+ })
274
+
275
+ path = OUT / "chicago_hai_bargaining.jsonl"
276
+ with open(path, "w") as f:
277
+ for r in rows:
278
+ f.write(json.dumps(r, ensure_ascii=False) + "\n")
279
+ print(f" β†’ {len(rows)} turns written to {path}")
280
+ return rows
281
+
282
+
283
+ def fetch_casino():
284
+ print("Fetching casino (CaSiNo) ...")
285
+ try:
286
+ ds = load_dataset("casino", trust_remote_code=True)
287
+ except Exception as e:
288
+ print(f" ! Could not load: {e}")
289
+ return []
290
+
291
+ rows = []
292
+ for split in ds.keys():
293
+ for ex in ds[split]:
294
+ chat = ex.get("chat_logs", [])
295
+ for turn in chat:
296
+ utt = turn.get("text", "")
297
+ if not utt:
298
+ continue
299
+
300
+ role = turn.get("id", "")
301
+ # CaSiNo per-turn strategy is in annotations, not task_data
302
+ # task_data contains item allocation info, not strategy labels
303
+ # Strategy labels are in ex["annotations"] keyed by worker
304
+ strategy_label = ""
305
+
306
+ tell = _default_tell()
307
+ sig = CASINO_STRATEGY_MAP.get(strategy_label, {})
308
+ tell.update(sig)
309
+
310
+ rows.append({
311
+ "source": "casino",
312
+ "role": role,
313
+ "utterance": utt,
314
+ "strategy": strategy_label,
315
+ "tell_supervision": tell,
316
+ })
317
+
318
+ path = OUT / "casino.jsonl"
319
+ with open(path, "w") as f:
320
+ for r in rows:
321
+ f.write(json.dumps(r, ensure_ascii=False) + "\n")
322
+ print(f" β†’ {len(rows)} turns written to {path}")
323
+ return rows
324
+
325
+
326
+ def merge_supervision(craigslist, chicago, casino):
327
+ """Merge all sources into a single supervision set for extractor training.
328
+
329
+ Each row: {"utterance": str, "tell_supervision": dict}
330
+ Only rows with non-trivial utterances (len > 10) and non-default tells are kept.
331
+ """
332
+ all_rows = craigslist + chicago + casino
333
+ merged = []
334
+ for r in all_rows:
335
+ utt = r.get("utterance", "").strip()
336
+ tell = r.get("tell_supervision", {})
337
+ if len(utt) < 10:
338
+ continue
339
+ # Keep only rows where at least one tell deviates from defaults
340
+ non_default = (
341
+ tell.get("verbal_urgency", 0.2) != 0.2
342
+ or tell.get("verbal_confidence", 0.5) != 0.5
343
+ or tell.get("verbal_deception_cue", 0.0) != 0.0
344
+ )
345
+ if not non_default:
346
+ continue
347
+ merged.append({"utterance": utt, "source": r["source"], "tell_supervision": tell})
348
+
349
+ path = OUT / "extractor_supervision.jsonl"
350
+ with open(path, "w") as f:
351
+ for r in merged:
352
+ f.write(json.dumps(r, ensure_ascii=False) + "\n")
353
+ print(f"\nMerged supervision set: {len(merged)} rows β†’ {path}")
354
+
355
+
356
+ if __name__ == "__main__":
357
+ craigslist = fetch_craigslist()
358
+ chicago = fetch_chicago_hai()
359
+ casino = fetch_casino()
360
+ merge_supervision(craigslist, chicago, casino)
361
+ print("\nDone. Run nlp/extractor.py to test extraction against these.")
nlp/keyword_patterns.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Keyword/phrase patterns for inline span-level tell highlighting.
2
+
3
+ Used by the /highlight endpoint to show users which exact phrases in their
4
+ message triggered which tell signal β€” Grammarly-style underlining in the
5
+ chat bubble.
6
+
7
+ Patterns mined from data/indian_negotiations.jsonl seller turns by strategy.
8
+ Hand-curated and grouped by tell signal:
9
+
10
+ urgency: "kal se", "abhi", "jaldi", "today only", "final price"
11
+ deception: "teen aur log dekh rahe", "other buyers", "kabhi nahi"
12
+ confidence: "market rate", "best price", "fixed price"
13
+ condition: "box pack", "scratch", "battery 81%", "abhi naya"
14
+
15
+ Each pattern has:
16
+ - regex (case-insensitive, word-bounded where useful)
17
+ - signal it triggers (urgency / deception / confidence / condition)
18
+ - score it adds to that signal (0-1)
19
+ - one-line explanation shown in the hover card
20
+
21
+ The frontend uses these to wrap matched spans in <mark> tags.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ from typing import Literal, NamedTuple
28
+
29
+ Signal = Literal["urgency", "deception", "confidence", "condition"]
30
+
31
+
32
+ class Pattern(NamedTuple):
33
+ pattern: re.Pattern[str]
34
+ signal: Signal
35
+ score: float
36
+ explanation: str
37
+ """Human-readable label for the hover card."""
38
+
39
+
40
+ def _p(regex: str, signal: Signal, score: float, explanation: str) -> Pattern:
41
+ return Pattern(re.compile(regex, re.IGNORECASE), signal, score, explanation)
42
+
43
+
44
+ PATTERNS: list[Pattern] = [
45
+ # ── URGENCY ──────────────────────────────────────────────────
46
+ _p(r"\bkal\s+se\b", "urgency", 0.6, "Time pressure: 'price changes tomorrow'"),
47
+ _p(r"\babhi\b(?!\s+nahi)", "urgency", 0.4, "Hindi 'right now' β€” pushes immediate decision"),
48
+ _p(r"\bjaldi\b", "urgency", 0.6, "Hindi 'quickly' β€” explicit urgency"),
49
+ _p(r"\btoday\s+only\b", "urgency", 0.7, "Time pressure: limited window"),
50
+ _p(r"\bfinal\s+price\b", "urgency", 0.5, "Anchoring: 'this is final, no negotiation'"),
51
+ _p(r"\blast\s+price\b", "urgency", 0.5, "Anchoring: claims this is the bottom"),
52
+ _p(r"\bfix(?:ed)?\s+(?:hai|price)\b", "urgency", 0.4, "Position commitment: 'price is fixed'"),
53
+ _p(r"\bno\s+(?:more\s+)?negotiation\b", "urgency", 0.7, "Closes the door on further bargaining"),
54
+ _p(r"\btime\s+waste\b", "urgency", 0.5, "Impatience signal"),
55
+ _p(r"\bimmediately\b", "urgency", 0.4, "Demands same-instant action"),
56
+ _p(r"\bsend\s+(?:the\s+)?money\b", "urgency", 0.5, "Pushing toward immediate transaction"),
57
+ _p(r"\b(?:i'?m|im|i am)\s+(?:making\s+a\s+)?los(?:s|ing)\b", "urgency", 0.55,
58
+ "Loss-claim sympathy push β€” pressures buyer to feel bad about price"),
59
+ _p(r"\bnot?\s+making\s+(?:any\s+)?(?:profit|money)\b", "urgency", 0.5, "No-profit sympathy push"),
60
+ _p(r"\bbarely\s+breaking\s+even\b", "urgency", 0.5, "Sympathy push: claims zero margin"),
61
+
62
+ # ── DECEPTION ────────────────────────────────────────────────
63
+ # The classic: "teen aur log dekh rahe" (three other people are looking)
64
+ _p(r"\bteen\s+aur\s+log\b", "deception", 0.8,
65
+ "External-incentive bluff: claims multiple competing buyers (CaSiNo deception cue)"),
66
+ _p(r"\bother\s+(?:people|buyers?)\s+(?:are\s+)?looking\b", "deception", 0.8,
67
+ "External-incentive bluff: claims competing buyers"),
68
+ _p(r"\bothers\s+are\s+looking\b", "deception", 0.8, "External-incentive bluff"),
69
+ _p(r"\bkoi\s+aur\s+(?:buyer|log)\b", "deception", 0.7, "Claims another buyer is interested"),
70
+ _p(r"\baur\s+log\s+(?:bhi\s+)?dekh\b", "deception", 0.7, "Claims more people watching"),
71
+ _p(r"\bdemand\s+(?:zyada|high)\b", "deception", 0.4, "Claims market demand to justify price"),
72
+ _p(r"\bmarket\s+(?:mein\s+)?(?:bahut\s+)?demand\b", "deception", 0.4, "Claims market demand"),
73
+ _p(r"\bbest\s+price\b", "deception", 0.3, "Self-praise β€” soft anchoring"),
74
+ # Numeric "3 other offers" / "two more buyers" β€” same external-incentive bluff
75
+ # as "teen aur log" but in English with digits or number-words.
76
+ _p(r"\b(?:\d+|two|three|four|five|several|multiple|many)\s+(?:other\s+|more\s+)?(?:offers?|buyers?|people|interested)\b",
77
+ "deception", 0.75, "External-incentive bluff: claims competing offers/buyers"),
78
+ _p(r"\bgot\s+(?:\d+|two|three|four|five|several|multiple|other)\s+(?:offers?|buyers?)\b",
79
+ "deception", 0.75, "Claims existing competing offers"),
80
+ _p(r"\b(?:already\s+)?have\s+(?:\d+|two|three|four|other)\s+(?:offers?|buyers?)\b",
81
+ "deception", 0.75, "Claims existing competing offers"),
82
+
83
+ # ── CONFIDENCE ───────────────────────────────────────────────
84
+ _p(r"\bmarket\s+rate\b", "confidence", 0.6, "Confidence: anchoring to external price reference"),
85
+ _p(r"\bmarket\s+mein\s+iski\b", "confidence", 0.5, "Confidence: market positioning"),
86
+ _p(r"\bnahi\s+ho(?:\s+payega)?\b", "confidence", 0.6, "Firm refusal: 'won't happen'"),
87
+ _p(r"\bmushkil\s+hai\b", "confidence", 0.4, "Mild firmness: 'difficult'"),
88
+ _p(r"\bisse\s+(?:upar|kam)\s+nahi\b", "confidence", 0.7, "Hard floor/ceiling commitment"),
89
+ _p(r"\bnot?\s+(?:lower|higher)\b", "confidence", 0.6, "Position commitment"),
90
+
91
+ # ── CONDITION ────────────────────────────────────────────────
92
+ _p(r"\bbox\s+(?:band|pack|sealed?)\b", "condition", 0.95, "Item is sealed / new in box"),
93
+ _p(r"\bseal\s+packed?\b", "condition", 0.95, "New, factory-sealed"),
94
+ _p(r"\babhi\s+box\s+se\s+nikala\b", "condition", 0.9, "Just unboxed β€” like new"),
95
+ _p(r"\b(?:bilkul\s+)?naya\b", "condition", 0.85, "Hindi 'brand new'"),
96
+ _p(r"\b(?:like\s+new|mint)\b", "condition", 0.85, "Like-new condition"),
97
+ _p(r"\bbarely\s+used\b", "condition", 0.8, "Lightly used"),
98
+ _p(r"\bek\s+(?:chhota\s+)?scratch\b", "condition", 0.55, "Minor scratch β€” visible wear"),
99
+ _p(r"\b(?:minor\s+)?scratch(?:es)?\b", "condition", 0.55, "Minor cosmetic damage"),
100
+ _p(r"\bdent\b", "condition", 0.5, "Dent β€” moderate wear"),
101
+ _p(r"\bchip(?:ped)?\b", "condition", 0.5, "Chipped β€” visible damage"),
102
+ _p(r"\bscreen\s+(?:replaced|change)\b", "condition", 0.35,
103
+ "Screen replacement β€” depreciation indicator"),
104
+ _p(r"\bbattery\s+(?:health\s+)?(\d{2,3})\s*%?\b", "condition", 0.4,
105
+ "Battery health disclosure β€” wear indicator"),
106
+ _p(r"\b(\d{1,2})\s*(?:saal|year)s?\s+(?:purana|old)\b", "condition", 0.5,
107
+ "Age disclosure"),
108
+ _p(r"\bkabhi\s+giraya\s+nahi\b", "condition", 0.85, "Never dropped β€” careful owner"),
109
+ _p(r"\boriginal\s+(?:box|charger|warranty)\b", "condition", 0.75,
110
+ "Has original accessories"),
111
+ _p(r"\bwarranty\b", "condition", 0.7, "Has warranty"),
112
+ _p(r"\bperfect\s+condition\b", "condition", 0.85, "Perfect condition claim"),
113
+ _p(r"\bworking\s+condition\b", "condition", 0.7, "Functional but unspecified wear"),
114
+ ]
115
+
116
+
117
+ class Match(NamedTuple):
118
+ start: int
119
+ end: int
120
+ text: str
121
+ signal: Signal
122
+ score: float
123
+ explanation: str
124
+
125
+
126
+ def find_matches(message: str) -> list[Match]:
127
+ """Find all pattern matches in `message`. Returns char-offset spans."""
128
+ matches: list[Match] = []
129
+ for pat in PATTERNS:
130
+ for m in pat.pattern.finditer(message):
131
+ matches.append(
132
+ Match(
133
+ start=m.start(),
134
+ end=m.end(),
135
+ text=m.group(0),
136
+ signal=pat.signal,
137
+ score=pat.score,
138
+ explanation=pat.explanation,
139
+ )
140
+ )
141
+ return matches
142
+
143
+
144
+ def aggregate_signals(matches: list[Match]) -> dict[str, float]:
145
+ """Roll up per-signal max score across matches."""
146
+ rolled: dict[str, float] = {}
147
+ for m in matches:
148
+ rolled[m.signal] = max(rolled.get(m.signal, 0.0), m.score)
149
+ return rolled
nlp/setup_ministral.sh ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Waits for the Indian negotiation generator to finish, then:
3
+ # 1. Pulls ministral-3:3b into Ollama
4
+ # 2. Swaps it as the default extractor model
5
+ # 3. Runs the extractor test to verify quality
6
+ #
7
+ # Run with: bash nlp/setup_ministral.sh &
8
+ # Log: /tmp/setup_ministral.log
9
+
10
+ set -euo pipefail
11
+ LOG=/tmp/setup_ministral.log
12
+ TARGET=500
13
+ JSONL=data/indian_negotiations.jsonl
14
+ EXTRACTOR=nlp/extractor.py
15
+
16
+ log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; }
17
+
18
+ cd /home/meow/Documents/Projects/MetaThon
19
+
20
+ log "Watching generator β€” waiting for $TARGET conversations in $JSONL ..."
21
+
22
+ while true; do
23
+ if [ -f "$JSONL" ]; then
24
+ count=$(wc -l < "$JSONL")
25
+ log "Progress: $count / $TARGET conversations"
26
+ if [ "$count" -ge "$TARGET" ]; then
27
+ log "Generator done."
28
+ break
29
+ fi
30
+ else
31
+ log "Output file not found yet, waiting..."
32
+ fi
33
+
34
+ # Also stop waiting if the generator process is gone and file exists
35
+ if [ -f "$JSONL" ] && ! pgrep -f generate_indian_negotiations.py > /dev/null 2>&1; then
36
+ count=$(wc -l < "$JSONL")
37
+ log "Generator process ended with $count conversations. Proceeding."
38
+ break
39
+ fi
40
+
41
+ sleep 120
42
+ done
43
+
44
+ log "Pulling ministral-3:3b ..."
45
+ ollama pull ministral-3:3b 2>&1 | tee -a "$LOG"
46
+
47
+ log "Verifying ministral-3:3b is available ..."
48
+ ollama list | tee -a "$LOG"
49
+
50
+ log "Running extractor test with ministral-3:3b ..."
51
+ PYTHONPATH=. .venv/bin/python "$EXTRACTOR" 2>&1 | tee -a "$LOG"
52
+
53
+ log "All done. Check $LOG for extractor quality results."
nlp/templates.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Buyer-message templates for SFT targets and steerer-override fallback.
2
+
3
+ Templates are bucketed by:
4
+ action_kind ∈ {offer_low, offer_mid, offer_high, accept, walk}
5
+ register ∈ {firm, soft, polite, curt, final} β€” escalating tone
6
+
7
+ Use the `turn_index` in render() to bias toward `final` register on later turns
8
+ (round-aware escalation: opening turns sound exploratory, late turns sound terminal).
9
+
10
+ The bank avoids "yaar" (informal/casual filler) and over-uses of "bhai" β€” keeps the
11
+ buyer's voice grounded in Hinglish-leaning English without sounding like a street vendor.
12
+ """
13
+
14
+ import random
15
+ from typing import Optional
16
+
17
+ # Each entry: (register, template). All offer templates have {price}.
18
+ TEMPLATES: dict[str, list[tuple[str, str]]] = {
19
+ "offer_low": [
20
+ ("firm", "{price} max de sakta hu, isse upar nahi"),
21
+ ("firm", "{price} mera offer, isse zyada nahi"),
22
+ ("firm", "{price} pe baat banegi, warna nahi"),
23
+ ("firm", "{price} hai bas, final"),
24
+ ("soft", "{price} mein de dijiye please"),
25
+ ("soft", "{price} chalega bhai?"),
26
+ ("soft", "{price} kar lo, deal pakki"),
27
+ ("polite", "market mein {price} mein mil jaata hai, dekh lijiye"),
28
+ ("polite", "honestly bhai, {price} fair lagta hai mujhe"),
29
+ ("polite", "{price} reasonable hai, condition dekh ke"),
30
+ ("curt", "{price}. le ya jaa."),
31
+ ("curt", "{price}, last from my side"),
32
+ ("final", "okay, {price} mera final offer hai"),
33
+ ("final", "{price} ya nahi β€” beyond this I walk"),
34
+ ],
35
+ "offer_mid": [
36
+ ("firm", "{price} pe karte hain deal"),
37
+ ("firm", "{price} works for me, lock kar do"),
38
+ ("firm", "chalo, {price} pe baat khatam"),
39
+ ("soft", "{price} chalega bhai?"),
40
+ ("soft", "{price} mein ho jaye?"),
41
+ ("soft", "thoda kam karo, {price} pe finalize?"),
42
+ ("polite", "{price} fair hai dono ke liye"),
43
+ ("polite", "{price} sahi rate lagta hai mujhe"),
44
+ ("curt", "{price}. that's where I am"),
45
+ ("curt", "{price}, isse upar nahi"),
46
+ ("final", "{price} ya I'm out"),
47
+ ("final", "this is my last move β€” {price}"),
48
+ ],
49
+ "offer_high": [
50
+ ("firm", "okay, {price} but that's the limit"),
51
+ ("firm", "{price}, isse upar nahi ja sakta"),
52
+ ("soft", "{price} okay? close kar dete hain"),
53
+ ("soft", "fine, {price} mein le leta hu"),
54
+ ("polite", "{price} stretch kar raha hu, condition fair lagi"),
55
+ ("polite", "{price} de raha hu since you've been reasonable"),
56
+ ("curt", "{price}. done?"),
57
+ ("curt", "{price}, last bid"),
58
+ ("final", "okay {price} β€” bas yahi ceiling hai"),
59
+ ("final", "{price} pe close ya I walk"),
60
+ ],
61
+ "accept": [
62
+ ("firm", "deal."),
63
+ ("firm", "done."),
64
+ ("firm", "chalo, deal."),
65
+ ("soft", "okay, le leta hu"),
66
+ ("soft", "theek hai, kar lete hain"),
67
+ ("polite", "fair, accepted"),
68
+ ("polite", "sounds good, deal pakki"),
69
+ ("curt", "haan."),
70
+ ("curt", "ho gaya, done"),
71
+ ("final", "deal, close kar dete hain"),
72
+ ("final", "okay, isi pe lock"),
73
+ ],
74
+ "walk": [
75
+ ("firm", "nahi yaar nahi, ye nahi ho payega"),
76
+ ("firm", "budget mein nahi aa raha, passing"),
77
+ ("soft", "thanks for your time, dekhte hain phir kabhi"),
78
+ ("soft", "appreciate it, but is price pe nahi"),
79
+ ("polite", "gap zyada hai, mujhe pass karna hoga"),
80
+ ("polite", "respect your floor, but mere liye nahi banega"),
81
+ ("curt", "no deal."),
82
+ ("curt", "passing, thanks"),
83
+ ("final", "bahut difference hai β€” walking"),
84
+ ("final", "is price pe nahi, goodbye"),
85
+ ],
86
+ }
87
+
88
+
89
+ def _bucket_for_offer(price: float, ask: float) -> str:
90
+ """Classify an offer price as low/mid/high relative to seller's ask."""
91
+ if ask <= 0:
92
+ return "offer_mid"
93
+ ratio = price / ask
94
+ if ratio < 0.55:
95
+ return "offer_low"
96
+ if ratio < 0.80:
97
+ return "offer_mid"
98
+ return "offer_high"
99
+
100
+
101
+ def _register_for_turn(turn_index: int, max_turns: int = 8) -> Optional[str]:
102
+ """Bias register based on turn position.
103
+
104
+ - Turns 0-1 (opening): polite or soft
105
+ - Turns 2-4 (mid): firm or soft
106
+ - Turns 5+ (late): curt or final
107
+ """
108
+ if turn_index < 0:
109
+ return None
110
+ progress = turn_index / max(1, max_turns)
111
+ if progress < 0.25:
112
+ return random.choice(["polite", "soft"])
113
+ if progress < 0.65:
114
+ return random.choice(["firm", "soft"])
115
+ return random.choice(["curt", "final"])
116
+
117
+
118
+ def render(
119
+ action: str,
120
+ price: Optional[float],
121
+ ask: Optional[float] = None,
122
+ intent: Optional[str] = None,
123
+ turn_index: Optional[int] = None,
124
+ max_turns: int = 8,
125
+ used_history: Optional[set[str]] = None,
126
+ rng: Optional[random.Random] = None,
127
+ ) -> str:
128
+ """Pick a template, render it with the given price, avoid recent repeats.
129
+
130
+ Args:
131
+ action: 'offer' | 'accept' | 'walk'
132
+ price: numeric price for offers; None for accept/walk
133
+ ask: seller's current ask (used to bucket offer price)
134
+ intent: explicit register override ('firm'|'soft'|'polite'|'curt'|'final')
135
+ turn_index: current round number β€” biases register toward 'final' as it grows
136
+ max_turns: typical episode length used for normalizing turn_index
137
+ used_history: set of templates already rendered this episode (avoid repeats)
138
+ rng: optional Random instance for reproducibility
139
+
140
+ Returns:
141
+ A natural-language line, with {price} slot filled.
142
+ """
143
+ rng = rng or random
144
+ if action == "offer":
145
+ bucket = _bucket_for_offer(price or 0, ask or (price or 0))
146
+ elif action == "accept":
147
+ bucket = "accept"
148
+ elif action == "walk":
149
+ bucket = "walk"
150
+ else:
151
+ return ""
152
+
153
+ candidates = TEMPLATES.get(bucket, [])
154
+ if not candidates:
155
+ return ""
156
+
157
+ # Determine register: explicit > turn-based > random
158
+ register = intent or (
159
+ _register_for_turn(turn_index, max_turns) if turn_index is not None else None
160
+ )
161
+
162
+ register_pool = [(r, t) for r, t in candidates if r == register] if register else list(candidates)
163
+ if not register_pool:
164
+ register_pool = list(candidates)
165
+
166
+ def _materialize(tmpl: str) -> str:
167
+ if "{price}" in tmpl and price is not None:
168
+ return tmpl.format(price=int(round(price)))
169
+ return tmpl
170
+
171
+ # `used_history` stores rendered messages, so compare against the materialized form.
172
+ # Variety > register fidelity when buyer is stuck β€” widen to all registers
173
+ # before allowing repeats.
174
+ if used_history:
175
+ fresh_in_register = [(r, t) for r, t in register_pool if _materialize(t) not in used_history]
176
+ if fresh_in_register:
177
+ pool = fresh_in_register
178
+ else:
179
+ fresh_anywhere = [(r, t) for r, t in candidates if _materialize(t) not in used_history]
180
+ pool = fresh_anywhere or register_pool
181
+ else:
182
+ pool = register_pool
183
+
184
+ _, tmpl = rng.choice(pool)
185
+ return _materialize(tmpl)
server/main.py CHANGED
@@ -4,10 +4,11 @@ from __future__ import annotations
4
 
5
  import copy
6
  import json
 
7
  from contextlib import asynccontextmanager
8
  from typing import Optional
9
 
10
- from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
11
  from fastapi.middleware.cors import CORSMiddleware
12
  from fastapi.responses import HTMLResponse
13
  from pydantic import BaseModel
@@ -91,6 +92,20 @@ class ArenaStepRequest(BaseModel):
91
 
92
  # ── App state ─────────────────────────────────────────────────────
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  _envs: dict[str, BazaarEnvironment] = {}
95
  _arenas: dict[str, MultiBuyerArena] = {}
96
  _ws_connections: dict[str, list[WebSocket]] = {}
@@ -337,6 +352,90 @@ async def health():
337
  return {"status": "ok", "version": "2.0.0"}
338
 
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  # ── Simulate (AI auto-play for spectator mode) ──────────────────
341
 
342
  class SimulateRequest(BaseModel):
@@ -356,8 +455,50 @@ class SellerModeStepRequest(BaseModel):
356
  price: float
357
 
358
 
359
- def _ai_buyer_action(obs: BazaarObservation, strategy: str, rng) -> BazaarAction:
360
- """Built-in AI buyer strategies for spectator / seller mode."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  budget = obs.own_private_budget
362
  ask = obs.seller_asking_price
363
  opp = obs.opponent_last_offer or ask
@@ -545,15 +686,33 @@ class SellerModeResetRequest(BaseModel):
545
  strategy: str = "smart"
546
  seed: Optional[int] = None
547
  opening_price: float = 60.0
 
 
548
 
549
 
550
  @app.post("/seller-mode/reset")
551
- async def seller_mode_reset(req: SellerModeResetRequest):
552
  """Start a seller-mode session. User plays as seller, AI plays as buyer."""
553
  if req.task not in TASKS:
554
  raise HTTPException(status_code=400, detail=f"Unknown task: {req.task}")
555
 
556
  task = copy.deepcopy(TASKS[req.task])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  # Store seller mode state
558
  import random
559
  session = {
@@ -562,8 +721,8 @@ async def seller_mode_reset(req: SellerModeResetRequest):
562
  "rng": random.Random(req.seed),
563
  "round": 0,
564
  "max_rounds": task.max_steps if task.total_episodes == 1 else task.max_steps // task.total_episodes,
565
- "buyer_budget": task.buyer_budget,
566
- "seller_cost": task.seller_cost,
567
  "current_seller_price": req.opening_price,
568
  "last_buyer_offer": None,
569
  "history": [],
@@ -577,17 +736,21 @@ async def seller_mode_reset(req: SellerModeResetRequest):
577
  current_round=0,
578
  max_rounds=session["max_rounds"],
579
  opponent_last_offer=req.opening_price,
580
- own_private_budget=task.buyer_budget,
581
  rounds_remaining=session["max_rounds"],
582
  seller_asking_price=req.opening_price,
583
- item_name="handwoven silk scarf",
584
  message=f"You open at {req.opening_price:.0f} rupees.",
585
  )
586
 
587
  # AI buyer makes first offer
588
- action = _ai_buyer_action(obs, req.strategy, session["rng"])
 
589
  session["round"] = 1
590
  session["last_buyer_offer"] = action.price
 
 
 
591
  session["history"].append({
592
  "round": 0,
593
  "actor": "seller",
@@ -599,9 +762,10 @@ async def seller_mode_reset(req: SellerModeResetRequest):
599
  "actor": "buyer",
600
  "action": action.action.value if hasattr(action.action, 'value') else action.action,
601
  "price": action.price,
 
602
  })
603
 
604
- buyer_msg = (
605
  f"Buyer offers {action.price:.0f} rupees."
606
  if action.action in ("offer", "OFFER", ActionType.OFFER)
607
  else f"Buyer {action.action}s."
@@ -611,7 +775,8 @@ async def seller_mode_reset(req: SellerModeResetRequest):
611
  "round": 1,
612
  "buyer_action": action.action.value if hasattr(action.action, 'value') else action.action,
613
  "buyer_price": action.price,
614
- "message": buyer_msg,
 
615
  "your_opening": req.opening_price,
616
  "history": session["history"],
617
  "done": False,
@@ -619,7 +784,7 @@ async def seller_mode_reset(req: SellerModeResetRequest):
619
 
620
 
621
  @app.post("/seller-mode/step")
622
- async def seller_mode_step(req: SellerModeStepRequest):
623
  """User (as seller) sets counteroffer price. AI buyer responds."""
624
  if "seller_mode" not in _envs:
625
  raise HTTPException(status_code=400, detail="No seller-mode session. Call /seller-mode/reset first.")
@@ -690,7 +855,8 @@ async def seller_mode_step(req: SellerModeStepRequest):
690
  }
691
 
692
  # AI buyer responds
693
- action = _ai_buyer_action(obs, session["strategy"], session["rng"])
 
694
 
695
  if action.action in ("accept", ActionType.ACCEPT):
696
  session["done"] = True
@@ -700,16 +866,20 @@ async def seller_mode_step(req: SellerModeStepRequest):
700
  max_surplus = session["buyer_budget"] - session["seller_cost"]
701
  buyer_score = max(0, surplus / max_surplus) if max_surplus > 0 else 0
702
 
 
 
703
  session["history"].append({
704
  "round": rnd,
705
  "actor": "buyer",
706
  "action": "accept",
707
  "price": seller_price,
 
708
  })
709
 
710
  return {
711
  "round": rnd,
712
- "message": f"Buyer accepts your price of {seller_price:.0f}! Deal closed.",
 
713
  "buyer_action": "accept",
714
  "buyer_price": seller_price,
715
  "done": True,
@@ -723,17 +893,21 @@ async def seller_mode_step(req: SellerModeStepRequest):
723
  elif action.action in ("walk", ActionType.WALK):
724
  session["done"] = True
725
  session["outcome"] = "walk"
 
 
726
 
727
  session["history"].append({
728
  "round": rnd,
729
  "actor": "buyer",
730
  "action": "walk",
731
  "price": None,
 
732
  })
733
 
734
  return {
735
  "round": rnd,
736
- "message": "Buyer walks away! No deal.",
 
737
  "buyer_action": "walk",
738
  "buyer_price": None,
739
  "done": True,
@@ -743,16 +917,21 @@ async def seller_mode_step(req: SellerModeStepRequest):
743
 
744
  else: # offer
745
  session["last_buyer_offer"] = action.price
 
 
 
746
  session["history"].append({
747
  "round": rnd,
748
  "actor": "buyer",
749
  "action": "offer",
750
  "price": action.price,
 
751
  })
752
 
753
  return {
754
  "round": rnd,
755
- "message": f"Buyer counters with {action.price:.0f} rupees.",
 
756
  "buyer_action": "offer",
757
  "buyer_price": action.price,
758
  "done": False,
 
4
 
5
  import copy
6
  import json
7
+ import os
8
  from contextlib import asynccontextmanager
9
  from typing import Optional
10
 
11
+ from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from fastapi.responses import HTMLResponse
14
  from pydantic import BaseModel
 
92
 
93
  # ── App state ─────────────────────────────────────────────────────
94
 
95
+ def _client_ip(request: Request) -> Optional[str]:
96
+ """Best-effort client IP for rate-limiting. Honors X-Forwarded-For when
97
+ deployed behind a proxy/CDN; falls back to direct socket peer.
98
+
99
+ Note: in untrusted environments XFF can be spoofed. Hosting plan today
100
+ is direct uvicorn or behind a single-hop reverse proxy we control, so
101
+ trusting the leftmost XFF entry is acceptable.
102
+ """
103
+ xff = request.headers.get("x-forwarded-for")
104
+ if xff:
105
+ return xff.split(",")[0].strip() or None
106
+ return request.client.host if request.client else None
107
+
108
+
109
  _envs: dict[str, BazaarEnvironment] = {}
110
  _arenas: dict[str, MultiBuyerArena] = {}
111
  _ws_connections: dict[str, list[WebSocket]] = {}
 
352
  return {"status": "ok", "version": "2.0.0"}
353
 
354
 
355
+ # ── Highlight: span-level tell extraction for the /sell page ────
356
+
357
+ class HighlightRequest(BaseModel):
358
+ message: str
359
+
360
+
361
+ class HighlightSpan(BaseModel):
362
+ start: int
363
+ end: int
364
+ text: str
365
+ signal: str
366
+ score: float
367
+ explanation: str
368
+
369
+
370
+ class HighlightResponse(BaseModel):
371
+ spans: list[HighlightSpan]
372
+ aggregate: dict[str, float]
373
+
374
+
375
+ @app.post("/highlight", response_model=HighlightResponse)
376
+ async def highlight(req: HighlightRequest):
377
+ """Find tell-triggering phrases in a seller message and return char spans.
378
+
379
+ Used by the /sell page to underline urgency/deception/condition phrases
380
+ in the user's chat bubble after they send. Pattern-based, deterministic,
381
+ no LLM call β€” instant.
382
+ """
383
+ from nlp.keyword_patterns import find_matches, aggregate_signals
384
+
385
+ matches = find_matches(req.message)
386
+ return HighlightResponse(
387
+ spans=[
388
+ HighlightSpan(
389
+ start=m.start, end=m.end, text=m.text,
390
+ signal=m.signal, score=m.score, explanation=m.explanation,
391
+ )
392
+ for m in matches
393
+ ],
394
+ aggregate=aggregate_signals(matches),
395
+ )
396
+
397
+
398
+ @app.get("/sauda/health")
399
+ async def sauda_health(request: Request):
400
+ """Probe both backends. Used to choose strategy and surface config errors.
401
+
402
+ Public response is intentionally minimal: just a green/red signal.
403
+ For the full ops view (spend, rate-limit hits, circuit-breaker state),
404
+ pass the X-Sauda-Admin header matching SAUDA_ADMIN_TOKEN env-var.
405
+ """
406
+ from .sauda_buyer import health as _full_health
407
+ full = _full_health()
408
+ admin_token = os.environ.get("SAUDA_ADMIN_TOKEN", "").strip()
409
+ is_admin = bool(admin_token) and request.headers.get("x-sauda-admin", "") == admin_token
410
+ if is_admin:
411
+ return full
412
+ # Public view: only the bits a UI needs to decide whether the live agent
413
+ # is reachable. No spend numbers, no IP counts, no circuit breaker state.
414
+ return {
415
+ "status": "ok" if (full.get("hf_ok") or full.get("ollama_ok")) else "degraded",
416
+ "live_agent_available": bool(full.get("hf_ok") or full.get("ollama_ok")),
417
+ }
418
+
419
+
420
+ @app.get("/sauda/backends")
421
+ async def sauda_backends():
422
+ """Static metadata about available buyer backends, for the /sell UI dropdown."""
423
+ return {
424
+ "backends": [
425
+ {"id": "sauda", "label": "Sauda v2 (HF Endpoint)", "primary": True,
426
+ "description": "Llama-3.1-8B + SFT+GRPO LoRA, served via HF Inference Endpoint."},
427
+ {"id": "sauda_ollama", "label": "Sauda v2 (Ollama, local)", "primary": False,
428
+ "description": "Same adapter, served locally via Ollama. Fallback when HF endpoint is unavailable."},
429
+ {"id": "smart", "label": "Rule-based (smart)", "primary": False,
430
+ "description": "Heuristic baseline. No LLM. Always available."},
431
+ {"id": "naive", "label": "Rule-based (naive)", "primary": False,
432
+ "description": "Easy buyer for seller-mode warmup."},
433
+ {"id": "aggressive", "label": "Rule-based (aggressive)", "primary": False,
434
+ "description": "Hard rule-based buyer."},
435
+ ]
436
+ }
437
+
438
+
439
  # ── Simulate (AI auto-play for spectator mode) ──────────────────
440
 
441
  class SimulateRequest(BaseModel):
 
455
  price: float
456
 
457
 
458
+ def _ai_buyer_action(
459
+ obs: BazaarObservation,
460
+ strategy: str,
461
+ rng,
462
+ *,
463
+ client_ip: Optional[str] = None,
464
+ ) -> BazaarAction:
465
+ """Built-in AI buyer strategies for spectator / seller mode.
466
+
467
+ `strategy` values:
468
+ - "sauda" / "sauda_hf" β†’ HF Inference Endpoint serving Sauda v2
469
+ - "sauda_ollama" β†’ local ollama serving Sauda v2
470
+ - "smart" / "naive" / "aggressive" β†’ rule-based heuristics (no LLM)
471
+
472
+ `client_ip` is forwarded to the safety layer for per-IP rate-limiting on
473
+ the metered HF backend; pass None for trusted server-internal callers.
474
+ """
475
+ # Live Sauda v2 path (HF endpoint primary, Ollama fallback selectable)
476
+ if strategy in ("sauda", "sauda_hf", "sauda_ollama"):
477
+ from .sauda_buyer import sauda_action
478
+ backend = "ollama" if strategy == "sauda_ollama" else "hf"
479
+ obs_dict = obs.model_dump() if hasattr(obs, "model_dump") else obs.dict()
480
+ result = sauda_action(obs_dict, backend=backend, client_ip=client_ip)
481
+ action_str = result.get("action", "offer")
482
+ price = result.get("price")
483
+ msg = result.get("message", "")
484
+ if action_str == "accept":
485
+ ba = BazaarAction(action="accept")
486
+ elif action_str == "walk":
487
+ ba = BazaarAction(action="walk")
488
+ else:
489
+ ba = BazaarAction(action="offer", price=float(price) if price is not None else round((obs.own_private_budget or 100) * 0.3, 2))
490
+ # Smuggle the model's prose message + backend trace through a side channel
491
+ # (BazaarAction has no message field; the route handler reads .sauda_message
492
+ # off the action when present).
493
+ try:
494
+ object.__setattr__(ba, "sauda_message", msg)
495
+ object.__setattr__(ba, "sauda_backend", result.get("backend", backend))
496
+ if result.get("error"):
497
+ object.__setattr__(ba, "sauda_error", result["error"])
498
+ except Exception:
499
+ pass
500
+ return ba
501
+
502
  budget = obs.own_private_budget
503
  ask = obs.seller_asking_price
504
  opp = obs.opponent_last_offer or ask
 
686
  strategy: str = "smart"
687
  seed: Optional[int] = None
688
  opening_price: float = 60.0
689
+ item_name: Optional[str] = None
690
+ listing_price: Optional[float] = None # if user picked a real listing, this is its MRP
691
 
692
 
693
  @app.post("/seller-mode/reset")
694
+ async def seller_mode_reset(req: SellerModeResetRequest, request: Request):
695
  """Start a seller-mode session. User plays as seller, AI plays as buyer."""
696
  if req.task not in TASKS:
697
  raise HTTPException(status_code=400, detail=f"Unknown task: {req.task}")
698
 
699
  task = copy.deepcopy(TASKS[req.task])
700
+
701
+ # Tasks have hardcoded buyer_budget / seller_cost from synthetic examples.
702
+ # When the user opens at a real-listing price ($2695 for an iPhone, $399
703
+ # for a sofa, etc) those numbers become nonsense and Sauda offers $30 on
704
+ # a $2695 ask. Anchor the scale on the task's *opening price prior* β€”
705
+ # buyer_budget = 1.67Γ—ask in single_deal (60 β†’ 100), and the relative
706
+ # ratios (cost / budget β‰ˆ 0.35, ask / budget β‰ˆ 0.6) hold across tasks.
707
+ # Derive sane budget/cost from the user's actual opening_price using those
708
+ # ratios so the buyer's model of the deal scales with the listing.
709
+ if req.opening_price and req.opening_price > 0:
710
+ scaled_budget = float(req.opening_price) * 1.05 # buyer can stretch ~5% above ask
711
+ scaled_cost = float(req.opening_price) * 0.35 # seller's true cost ~35% of ask
712
+ else:
713
+ scaled_budget = task.buyer_budget
714
+ scaled_cost = task.seller_cost
715
+
716
  # Store seller mode state
717
  import random
718
  session = {
 
721
  "rng": random.Random(req.seed),
722
  "round": 0,
723
  "max_rounds": task.max_steps if task.total_episodes == 1 else task.max_steps // task.total_episodes,
724
+ "buyer_budget": scaled_budget,
725
+ "seller_cost": scaled_cost,
726
  "current_seller_price": req.opening_price,
727
  "last_buyer_offer": None,
728
  "history": [],
 
736
  current_round=0,
737
  max_rounds=session["max_rounds"],
738
  opponent_last_offer=req.opening_price,
739
+ own_private_budget=scaled_budget,
740
  rounds_remaining=session["max_rounds"],
741
  seller_asking_price=req.opening_price,
742
+ item_name=req.item_name or "handwoven silk scarf",
743
  message=f"You open at {req.opening_price:.0f} rupees.",
744
  )
745
 
746
  # AI buyer makes first offer
747
+ client_ip = _client_ip(request)
748
+ action = _ai_buyer_action(obs, req.strategy, session["rng"], client_ip=client_ip)
749
  session["round"] = 1
750
  session["last_buyer_offer"] = action.price
751
+ sauda_msg = getattr(action, "sauda_message", None) or ""
752
+ sauda_backend = getattr(action, "sauda_backend", None)
753
+ sauda_error = getattr(action, "sauda_error", None)
754
  session["history"].append({
755
  "round": 0,
756
  "actor": "seller",
 
762
  "actor": "buyer",
763
  "action": action.action.value if hasattr(action.action, 'value') else action.action,
764
  "price": action.price,
765
+ "message": sauda_msg,
766
  })
767
 
768
+ fallback_msg = (
769
  f"Buyer offers {action.price:.0f} rupees."
770
  if action.action in ("offer", "OFFER", ActionType.OFFER)
771
  else f"Buyer {action.action}s."
 
775
  "round": 1,
776
  "buyer_action": action.action.value if hasattr(action.action, 'value') else action.action,
777
  "buyer_price": action.price,
778
+ "message": sauda_msg or fallback_msg,
779
+ "buyer_message": sauda_msg,
780
  "your_opening": req.opening_price,
781
  "history": session["history"],
782
  "done": False,
 
784
 
785
 
786
  @app.post("/seller-mode/step")
787
+ async def seller_mode_step(req: SellerModeStepRequest, request: Request):
788
  """User (as seller) sets counteroffer price. AI buyer responds."""
789
  if "seller_mode" not in _envs:
790
  raise HTTPException(status_code=400, detail="No seller-mode session. Call /seller-mode/reset first.")
 
855
  }
856
 
857
  # AI buyer responds
858
+ client_ip = _client_ip(request)
859
+ action = _ai_buyer_action(obs, session["strategy"], session["rng"], client_ip=client_ip)
860
 
861
  if action.action in ("accept", ActionType.ACCEPT):
862
  session["done"] = True
 
866
  max_surplus = session["buyer_budget"] - session["seller_cost"]
867
  buyer_score = max(0, surplus / max_surplus) if max_surplus > 0 else 0
868
 
869
+ sauda_msg = getattr(action, "sauda_message", None) or ""
870
+ sauda_backend = getattr(action, "sauda_backend", None)
871
  session["history"].append({
872
  "round": rnd,
873
  "actor": "buyer",
874
  "action": "accept",
875
  "price": seller_price,
876
+ "message": sauda_msg,
877
  })
878
 
879
  return {
880
  "round": rnd,
881
+ "message": sauda_msg or f"Buyer accepts your price of {seller_price:.0f}! Deal closed.",
882
+ "buyer_message": sauda_msg,
883
  "buyer_action": "accept",
884
  "buyer_price": seller_price,
885
  "done": True,
 
893
  elif action.action in ("walk", ActionType.WALK):
894
  session["done"] = True
895
  session["outcome"] = "walk"
896
+ sauda_msg = getattr(action, "sauda_message", None) or ""
897
+ sauda_backend = getattr(action, "sauda_backend", None)
898
 
899
  session["history"].append({
900
  "round": rnd,
901
  "actor": "buyer",
902
  "action": "walk",
903
  "price": None,
904
+ "message": sauda_msg,
905
  })
906
 
907
  return {
908
  "round": rnd,
909
+ "message": sauda_msg or "Buyer walks away! No deal.",
910
+ "buyer_message": sauda_msg,
911
  "buyer_action": "walk",
912
  "buyer_price": None,
913
  "done": True,
 
917
 
918
  else: # offer
919
  session["last_buyer_offer"] = action.price
920
+ sauda_msg = getattr(action, "sauda_message", None) or ""
921
+ sauda_backend = getattr(action, "sauda_backend", None)
922
+ sauda_error = getattr(action, "sauda_error", None)
923
  session["history"].append({
924
  "round": rnd,
925
  "actor": "buyer",
926
  "action": "offer",
927
  "price": action.price,
928
+ "message": sauda_msg,
929
  })
930
 
931
  return {
932
  "round": rnd,
933
+ "message": sauda_msg or f"Buyer counters with {action.price:.0f} rupees.",
934
+ "buyer_message": sauda_msg,
935
  "buyer_action": "offer",
936
  "buyer_price": action.price,
937
  "done": False,
server/safety.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cost & abuse defenses for the live Sauda HF Inference Endpoint.
2
+
3
+ The /sell page is exposed to the public during the demo window. A bot loop on
4
+ /seller-mode/step would burn HF tokens unbounded. This module gates every HF
5
+ call behind:
6
+
7
+ 1. Hard daily call cap (HF only β€” ollama/rule are unmetered locally).
8
+ 2. Per-IP sliding-window rate limit.
9
+ 3. Global concurrent-in-flight cap.
10
+ 4. Circuit breaker: if HF errors N times in a row, lock to fallback for K min.
11
+ 5. Prompt-size cap (anti-prompt-injection ballooning).
12
+
13
+ When a gate trips, we silently downgrade to the next backend (ollama β†’ rule).
14
+ We never tell the user "you've been rate limited" β€” the UI just sees a slightly
15
+ slower or simpler buyer. The internals are surfaced via /sauda/health for ops.
16
+
17
+ Counters persist to disk (`runs/safety_state.json`) so a restart doesn't reset
18
+ the daily cap and let an attacker get a fresh budget.
19
+
20
+ All gates default to permissive numbers tuned for "live demo, ~50 humans poking
21
+ at it for an hour"; tighten via env-vars for production.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ import os
28
+ import threading
29
+ import time
30
+ from collections import deque
31
+ from pathlib import Path
32
+ from typing import Any, Optional
33
+
34
+ # ── Tunables ──────────────────────────────────────────────────────────
35
+
36
+ # Hard cap on total HF calls per UTC day. Trip β†’ flip to ollama for the rest of
37
+ # the day. Default 1500 β‰ˆ ~$3-5 of a10g-small at typical token sizes.
38
+ MAX_HF_CALLS_PER_DAY = int(os.environ.get("SAUDA_HF_MAX_CALLS_PER_DAY", "1500"))
39
+
40
+ # Per-IP sliding-window. (window_seconds, max_calls) tuples.
41
+ IP_LIMITS: list[tuple[int, int]] = [
42
+ (60, int(os.environ.get("SAUDA_RL_PER_MIN", "30"))),
43
+ (3600, int(os.environ.get("SAUDA_RL_PER_HOUR", "200"))),
44
+ (86400, int(os.environ.get("SAUDA_RL_PER_DAY", "500"))),
45
+ ]
46
+
47
+ # Max concurrent in-flight HF calls. Excess gets ollama immediately.
48
+ MAX_CONCURRENT_HF = int(os.environ.get("SAUDA_MAX_CONCURRENT_HF", "4"))
49
+
50
+ # Circuit breaker: trip after N consecutive HF errors, stay tripped for K seconds.
51
+ CB_ERROR_THRESHOLD = int(os.environ.get("SAUDA_CB_ERRORS", "3"))
52
+ CB_COOLDOWN_SEC = int(os.environ.get("SAUDA_CB_COOLDOWN", "300"))
53
+
54
+ # Reject prompts longer than this many chars (anti-injection ballooning).
55
+ MAX_PROMPT_CHARS = int(os.environ.get("SAUDA_MAX_PROMPT_CHARS", "4000"))
56
+
57
+ STATE_FILE = Path(os.environ.get("SAUDA_SAFETY_STATE", "runs/safety_state.json"))
58
+
59
+
60
+ # ── Internal state ────────────────────────────────────────────────────
61
+
62
+ _lock = threading.Lock()
63
+
64
+ # IP β†’ deque[float timestamps]
65
+ _ip_calls: dict[str, deque[float]] = {}
66
+
67
+ # Global concurrency counter.
68
+ _inflight = 0
69
+
70
+ # Circuit breaker state.
71
+ _consecutive_errors = 0
72
+ _cb_open_until: float = 0.0
73
+
74
+ # Daily counter: { "utc_date": "YYYY-MM-DD", "calls": int }
75
+ _daily = {"utc_date": "", "calls": 0}
76
+
77
+ # Total spend trace for ops (resets on restart, not safety-critical).
78
+ _lifetime = {"hf_calls": 0, "hf_errors": 0, "ollama_calls": 0, "rule_calls": 0,
79
+ "blocked_daily": 0, "blocked_ip": 0, "blocked_concurrency": 0,
80
+ "blocked_circuit": 0, "blocked_prompt": 0}
81
+
82
+
83
+ def _today() -> str:
84
+ return time.strftime("%Y-%m-%d", time.gmtime())
85
+
86
+
87
+ def _load_state() -> None:
88
+ global _daily
89
+ if not STATE_FILE.exists():
90
+ return
91
+ try:
92
+ data = json.loads(STATE_FILE.read_text())
93
+ if isinstance(data, dict) and data.get("utc_date") == _today():
94
+ _daily = {"utc_date": data["utc_date"], "calls": int(data.get("calls", 0))}
95
+ except Exception:
96
+ pass
97
+
98
+
99
+ def _persist_state() -> None:
100
+ try:
101
+ STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
102
+ STATE_FILE.write_text(json.dumps(_daily))
103
+ except Exception:
104
+ pass
105
+
106
+
107
+ _load_state()
108
+
109
+
110
+ # ── Public API ────────────────────────────────────────────────────────
111
+
112
+
113
+ class HFCallDenied(Exception):
114
+ """Raised when a safety gate refuses an HF call. Caller should fall back."""
115
+ def __init__(self, reason: str, gate: str):
116
+ super().__init__(reason)
117
+ self.reason = reason
118
+ self.gate = gate
119
+
120
+
121
+ def check_prompt_size(text: str) -> None:
122
+ """Reject prompts that are too large to be plausibly normal."""
123
+ if len(text) > MAX_PROMPT_CHARS:
124
+ with _lock:
125
+ _lifetime["blocked_prompt"] += 1
126
+ raise HFCallDenied(
127
+ f"prompt {len(text)} chars > cap {MAX_PROMPT_CHARS}",
128
+ gate="prompt_size",
129
+ )
130
+
131
+
132
+ def acquire_hf_slot(client_ip: Optional[str] = None) -> None:
133
+ """Check all gates and reserve an in-flight slot for an HF call.
134
+
135
+ Caller MUST call `release_hf_slot(success=...)` after the call (in finally).
136
+ Raises HFCallDenied if any gate trips.
137
+ """
138
+ global _inflight
139
+ now = time.time()
140
+ today = _today()
141
+
142
+ with _lock:
143
+ # 1) Roll over daily counter at UTC midnight.
144
+ if _daily["utc_date"] != today:
145
+ _daily["utc_date"] = today
146
+ _daily["calls"] = 0
147
+ _persist_state()
148
+
149
+ # 2) Daily hard cap.
150
+ if _daily["calls"] >= MAX_HF_CALLS_PER_DAY:
151
+ _lifetime["blocked_daily"] += 1
152
+ raise HFCallDenied(
153
+ f"daily HF cap {MAX_HF_CALLS_PER_DAY} reached",
154
+ gate="daily_cap",
155
+ )
156
+
157
+ # 3) Circuit breaker.
158
+ if now < _cb_open_until:
159
+ _lifetime["blocked_circuit"] += 1
160
+ raise HFCallDenied(
161
+ f"circuit breaker open for {int(_cb_open_until - now)}s more",
162
+ gate="circuit_breaker",
163
+ )
164
+
165
+ # 4) Concurrency.
166
+ if _inflight >= MAX_CONCURRENT_HF:
167
+ _lifetime["blocked_concurrency"] += 1
168
+ raise HFCallDenied(
169
+ f"concurrent in-flight cap {MAX_CONCURRENT_HF} reached",
170
+ gate="concurrency",
171
+ )
172
+
173
+ # 5) Per-IP sliding windows.
174
+ if client_ip:
175
+ dq = _ip_calls.setdefault(client_ip, deque())
176
+ for window_s, max_calls in IP_LIMITS:
177
+ cutoff = now - window_s
178
+ while dq and dq[0] < cutoff:
179
+ dq.popleft()
180
+ count_in_window = sum(1 for t in dq if t >= cutoff)
181
+ if count_in_window >= max_calls:
182
+ _lifetime["blocked_ip"] += 1
183
+ raise HFCallDenied(
184
+ f"ip {client_ip} hit {max_calls}/{window_s}s",
185
+ gate=f"ip_rate_{window_s}s",
186
+ )
187
+ dq.append(now)
188
+
189
+ # All gates passed β€” reserve.
190
+ _inflight += 1
191
+ _daily["calls"] += 1
192
+ _lifetime["hf_calls"] += 1
193
+ # Persist every 10 calls to keep disk writes cheap but bounded.
194
+ if _daily["calls"] % 10 == 0:
195
+ _persist_state()
196
+
197
+
198
+ def release_hf_slot(success: bool) -> None:
199
+ """Mark an in-flight HF call done. `success` updates the circuit breaker."""
200
+ global _inflight, _consecutive_errors, _cb_open_until
201
+ with _lock:
202
+ _inflight = max(0, _inflight - 1)
203
+ if success:
204
+ _consecutive_errors = 0
205
+ else:
206
+ _consecutive_errors += 1
207
+ _lifetime["hf_errors"] += 1
208
+ if _consecutive_errors >= CB_ERROR_THRESHOLD:
209
+ _cb_open_until = time.time() + CB_COOLDOWN_SEC
210
+
211
+
212
+ def note_fallback(kind: str) -> None:
213
+ """Track non-HF backend usage (for /sauda/health stats)."""
214
+ with _lock:
215
+ if kind == "ollama":
216
+ _lifetime["ollama_calls"] += 1
217
+ elif kind == "rule":
218
+ _lifetime["rule_calls"] += 1
219
+
220
+
221
+ def stats() -> dict[str, Any]:
222
+ """Snapshot of safety state, surfaced via /sauda/health (ops use only)."""
223
+ with _lock:
224
+ now = time.time()
225
+ return {
226
+ "daily": dict(_daily),
227
+ "daily_cap": MAX_HF_CALLS_PER_DAY,
228
+ "inflight": _inflight,
229
+ "concurrency_cap": MAX_CONCURRENT_HF,
230
+ "circuit_breaker_open": now < _cb_open_until,
231
+ "circuit_breaker_open_for_s": max(0, int(_cb_open_until - now)),
232
+ "consecutive_errors": _consecutive_errors,
233
+ "lifetime": dict(_lifetime),
234
+ "ip_limits": [{"window_s": w, "max_calls": n} for w, n in IP_LIMITS],
235
+ "tracked_ips": len(_ip_calls),
236
+ }
server/sauda_buyer.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Live Sauda buyer endpoints for the /sell page.
2
+
3
+ Two backends:
4
+ - "hf" β†’ POST to a Hugging Face Inference Endpoint (production)
5
+ - "ollama" β†’ POST to a local ollama server (fallback / dev)
6
+
7
+ Configuration via env-vars:
8
+ SAUDA_BACKEND β€” "hf" (default), "ollama", or "rule" (skip LLM)
9
+ SAUDA_HF_URL β€” full HF Inference Endpoint URL, e.g.
10
+ "https://abc123.us-east-1.aws.endpoints.huggingface.cloud"
11
+ SAUDA_HF_TOKEN β€” HF token with read access to the endpoint
12
+ SAUDA_OLLAMA_URL β€” ollama base URL (default http://localhost:11434)
13
+ SAUDA_OLLAMA_MODEL β€” ollama tag (default "bestdealbot")
14
+
15
+ Both paths render the buyer's observation through the same prompt the eval
16
+ harness uses (DEFAULT_SYSTEM_PROMPT + format_observation), parse the action
17
+ via parse_action, and apply the same Bayesian seller-tell steering as the
18
+ v2 evaluation runs. Result: the /sell page sees the exact same buyer the
19
+ research numbers are based on, just exposed over HTTP instead of in-process.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import json
25
+ import os
26
+ import re
27
+ from typing import Any, Optional
28
+
29
+ import requests
30
+
31
+ from bazaarbot_env import (
32
+ DEFAULT_SYSTEM_PROMPT,
33
+ format_observation,
34
+ parse_action,
35
+ steer_bayesian_action,
36
+ )
37
+
38
+ from .safety import (
39
+ HFCallDenied,
40
+ acquire_hf_slot,
41
+ check_prompt_size,
42
+ note_fallback,
43
+ release_hf_slot,
44
+ )
45
+
46
+
47
+ # ── Helpers ─────────────────────────────────────────────────────────
48
+
49
+
50
+ def _build_prompt(obs_dict: dict[str, Any]) -> tuple[str, str]:
51
+ """Return (system, user) messages for chat-style backends."""
52
+ return DEFAULT_SYSTEM_PROMPT, format_observation(obs_dict)
53
+
54
+
55
+ def _post_json(url: str, payload: dict[str, Any], headers: dict[str, str], timeout: int = 30) -> dict:
56
+ resp = requests.post(url, json=payload, headers=headers, timeout=timeout)
57
+ resp.raise_for_status()
58
+ return resp.json()
59
+
60
+
61
+ # ── HF Inference Endpoint backend ────────────────────────────────────
62
+
63
+
64
+ def _hf_chat(system: str, user: str, *, max_new_tokens: int = 96, temperature: float = 0.6) -> str:
65
+ """POST to a HF Inference Endpoint serving a text-generation model.
66
+
67
+ Endpoints accept either OpenAI-compatible chat completions OR HF native
68
+ text-generation payloads depending on how they're deployed. We send the
69
+ OpenAI-compatible shape first since modern HF endpoints support it.
70
+ """
71
+ url = os.environ.get("SAUDA_HF_URL", "").rstrip("/")
72
+ token = os.environ.get("SAUDA_HF_TOKEN") or os.environ.get("HF_TOKEN")
73
+ if not url or not token:
74
+ raise RuntimeError("SAUDA_HF_URL and SAUDA_HF_TOKEN must be set")
75
+
76
+ headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
77
+
78
+ # Try OpenAI-compatible chat completions endpoint first
79
+ chat_url = url + "/v1/chat/completions"
80
+ chat_payload = {
81
+ "messages": [
82
+ {"role": "system", "content": system},
83
+ {"role": "user", "content": user},
84
+ ],
85
+ "max_tokens": max_new_tokens,
86
+ "temperature": temperature,
87
+ "top_p": 0.9,
88
+ }
89
+ try:
90
+ data = _post_json(chat_url, chat_payload, headers)
91
+ return data["choices"][0]["message"]["content"]
92
+ except Exception:
93
+ pass
94
+
95
+ # Fall back to HF native text-generation
96
+ payload = {
97
+ "inputs": f"{system}\n\n{user}\n",
98
+ "parameters": {
99
+ "max_new_tokens": max_new_tokens,
100
+ "temperature": temperature,
101
+ "top_p": 0.9,
102
+ "return_full_text": False,
103
+ },
104
+ }
105
+ data = _post_json(url, payload, headers)
106
+ if isinstance(data, list) and data and "generated_text" in data[0]:
107
+ return data[0]["generated_text"]
108
+ if isinstance(data, dict) and "generated_text" in data:
109
+ return data["generated_text"]
110
+ raise RuntimeError(f"Unexpected HF endpoint response shape: {str(data)[:200]}")
111
+
112
+
113
+ # ── Ollama backend ───────────────────────────────────────────────────
114
+
115
+
116
+ def _ollama_chat(system: str, user: str, *, max_new_tokens: int = 96, temperature: float = 0.6) -> str:
117
+ """POST to a local ollama server."""
118
+ host = os.environ.get("SAUDA_OLLAMA_URL", "http://localhost:11434").rstrip("/")
119
+ model = os.environ.get("SAUDA_OLLAMA_MODEL", "bestdealbot")
120
+
121
+ payload = {
122
+ "model": model,
123
+ "messages": [
124
+ {"role": "system", "content": system},
125
+ {"role": "user", "content": user},
126
+ ],
127
+ "stream": False,
128
+ "options": {
129
+ "temperature": temperature,
130
+ "top_p": 0.9,
131
+ "num_predict": max_new_tokens,
132
+ },
133
+ }
134
+ data = _post_json(f"{host}/api/chat", payload, {}, timeout=60)
135
+ return data.get("message", {}).get("content", "")
136
+
137
+
138
+ # ── Public entrypoint ────────────────────────────────────────────────
139
+
140
+
141
+ def sauda_action(
142
+ obs_dict: dict[str, Any],
143
+ *,
144
+ backend: Optional[str] = None,
145
+ use_steering: bool = True,
146
+ client_ip: Optional[str] = None,
147
+ ) -> dict[str, Any]:
148
+ """Get a buyer action from Sauda v2.
149
+
150
+ Returns dict with keys: action ("offer"|"accept"|"walk"), price (float|None),
151
+ message (str), backend (str echoing which path served), error (str if any).
152
+
153
+ Never raises β€” falls back through HF β†’ ollama β†’ rule. The /sell page is
154
+ interactive and a 500 mid-demo is worse than a dumb fallback.
155
+
156
+ Safety gates (rate/spend/concurrency caps) wrap the HF path; if any trips,
157
+ we silently downgrade to ollama and don't tell the user. `client_ip` is
158
+ used for per-IP rate-limiting; pass None for trusted server-internal calls.
159
+ """
160
+ chosen = (backend or os.environ.get("SAUDA_BACKEND") or "hf").lower()
161
+ system, user = _build_prompt(obs_dict)
162
+
163
+ text = ""
164
+ err: Optional[str] = None
165
+ served_by = chosen
166
+
167
+ def _try_hf() -> str:
168
+ """HF path with safety gates. Raises on any failure (caller falls back)."""
169
+ check_prompt_size(system + user)
170
+ acquire_hf_slot(client_ip=client_ip)
171
+ ok = False
172
+ try:
173
+ out = _hf_chat(system, user)
174
+ ok = True
175
+ return out
176
+ finally:
177
+ release_hf_slot(success=ok)
178
+
179
+ def _try_ollama() -> str:
180
+ out = _ollama_chat(system, user)
181
+ note_fallback("ollama")
182
+ return out
183
+
184
+ try:
185
+ if chosen == "hf":
186
+ text = _try_hf()
187
+ elif chosen == "ollama":
188
+ text = _try_ollama()
189
+ elif chosen == "rule":
190
+ note_fallback("rule")
191
+ text = "" # forces fallback path below
192
+ else:
193
+ raise RuntimeError(f"unknown SAUDA_BACKEND: {chosen}")
194
+ except HFCallDenied as e:
195
+ # Safety gate tripped. Silently downgrade to ollama; if that fails too,
196
+ # the rule-based fallback below kicks in.
197
+ err = f"hf gated ({e.gate}); using ollama"
198
+ served_by = "ollama"
199
+ try:
200
+ text = _try_ollama()
201
+ except Exception as e2:
202
+ err = f"hf gated ({e.gate}); ollama also failed: {type(e2).__name__}"
203
+ served_by = "rule"
204
+ note_fallback("rule")
205
+ except Exception as e:
206
+ err = f"{chosen} backend failed: {type(e).__name__}: {str(e)[:160]}"
207
+ served_by = "ollama" if chosen == "hf" else f"{chosen}+fallback"
208
+ # If primary was HF, try ollama before giving up.
209
+ if chosen == "hf":
210
+ try:
211
+ text = _try_ollama()
212
+ except Exception as e2:
213
+ err = f"hf failed; ollama also failed: {type(e2).__name__}"
214
+ served_by = "rule"
215
+ note_fallback("rule")
216
+
217
+ fallback_price = float(obs_dict.get("own_private_budget") or 100) * 0.3
218
+ if text:
219
+ action = parse_action(text, fallback_price=fallback_price)
220
+ action.pop("_parse_error", None)
221
+ else:
222
+ # Conservative rule-based fallback: open at 35% of ask, escalate by round.
223
+ ask = float(obs_dict.get("seller_asking_price") or obs_dict.get("opponent_last_offer") or 100)
224
+ rnd = int(obs_dict.get("current_round") or 0)
225
+ last = obs_dict.get("own_last_offer")
226
+ if last is None:
227
+ price = round(ask * 0.35, 2)
228
+ else:
229
+ price = round(float(last) + (ask - float(last)) * 0.25, 2)
230
+ action = {"action": "offer", "price": price, "message": ""}
231
+
232
+ if use_steering:
233
+ try:
234
+ action = steer_bayesian_action(obs_dict, action)
235
+ except Exception:
236
+ pass
237
+
238
+ out: dict[str, Any] = {
239
+ "action": str(action.get("action", "offer")),
240
+ "price": action.get("price"),
241
+ "message": action.get("message") or "",
242
+ "backend": served_by,
243
+ }
244
+ if err:
245
+ out["error"] = err
246
+ return out
247
+
248
+
249
+ def health() -> dict[str, Any]:
250
+ """Quick reachability probe for both backends. Used by /sauda/health."""
251
+ out: dict[str, Any] = {
252
+ "active_backend": (os.environ.get("SAUDA_BACKEND") or "hf").lower(),
253
+ "hf_configured": bool(os.environ.get("SAUDA_HF_URL")) and bool(
254
+ os.environ.get("SAUDA_HF_TOKEN") or os.environ.get("HF_TOKEN")
255
+ ),
256
+ "ollama_url": os.environ.get("SAUDA_OLLAMA_URL", "http://localhost:11434"),
257
+ "ollama_model": os.environ.get("SAUDA_OLLAMA_MODEL", "bestdealbot"),
258
+ }
259
+ # Probe HF (skip if not configured)
260
+ if out["hf_configured"]:
261
+ try:
262
+ url = os.environ["SAUDA_HF_URL"].rstrip("/")
263
+ token = os.environ.get("SAUDA_HF_TOKEN") or os.environ["HF_TOKEN"]
264
+ r = requests.get(url + "/health", headers={"Authorization": f"Bearer {token}"}, timeout=5)
265
+ out["hf_ok"] = r.status_code < 500
266
+ out["hf_status"] = r.status_code
267
+ except Exception as e:
268
+ out["hf_ok"] = False
269
+ out["hf_error"] = f"{type(e).__name__}: {str(e)[:120]}"
270
+ # Probe Ollama
271
+ try:
272
+ host = out["ollama_url"]
273
+ r = requests.get(f"{host}/api/tags", timeout=3)
274
+ out["ollama_ok"] = r.status_code == 200
275
+ if r.status_code == 200:
276
+ tags = [m.get("name", "") for m in r.json().get("models", [])]
277
+ out["ollama_has_model"] = out["ollama_model"] in tags or any(
278
+ t.startswith(out["ollama_model"]) for t in tags
279
+ )
280
+ except Exception as e:
281
+ out["ollama_ok"] = False
282
+ out["ollama_error"] = f"{type(e).__name__}: {str(e)[:120]}"
283
+ # Safety / spend stats (ops use only β€” don't expose details to UI).
284
+ try:
285
+ from .safety import stats as _safety_stats
286
+ out["safety"] = _safety_stats()
287
+ except Exception:
288
+ pass
289
+ return out