mirror of
https://github.com/marvinscham/masterthesis-playground.git
synced 2026-03-22 00:12:42 +01:00
561 lines
19 KiB
Python
561 lines
19 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
Generate 300–1000+ English interview questions targeted ONLY at culturally/spiritually
|
||
interested Bali tourists (Lead Users), covering 5 cognitive destination image dimensions:
|
||
- Natural Attractions
|
||
- Atmosphere
|
||
- Social Environment
|
||
- Infrastructure
|
||
- Value for Money
|
||
|
||
Key constraint:
|
||
- Every prompt must be meaningful for culture/spirituality-first travelers.
|
||
- Avoid party/shopping/hedonistic positioning.
|
||
- Include etiquette, authenticity, sacredness, commodification, meaning-making, reflection.
|
||
|
||
Outputs:
|
||
- JSONL: {"dimension": "...", "type": "...", "prompt": "...", "tags": [...]}
|
||
- or TXT: one prompt per line
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import random
|
||
import re
|
||
from typing import Dict, List, Tuple
|
||
|
||
DIMENSIONS = [
|
||
"Natural Attractions",
|
||
"Atmosphere",
|
||
"Social Environment",
|
||
"Infrastructure",
|
||
"Value for Money",
|
||
]
|
||
|
||
# -----------------------------
|
||
# Segment-specific building blocks
|
||
# -----------------------------
|
||
# Keep places generic (no need to hallucinate specific proper nouns)
|
||
NATURE_FOR_MEANING = [
|
||
"rice terraces that feel lived-in rather than staged",
|
||
"waterfalls approached with a quiet, respectful mood",
|
||
"volcano viewpoints that invite reflection at dawn",
|
||
"jungle walks where you notice offerings and small shrines",
|
||
"lake areas that feel calm and contemplative",
|
||
"coastal paths that feel like a moving meditation",
|
||
"hot springs experienced as restoration rather than spectacle",
|
||
]
|
||
|
||
CULTURE_SPIRIT_SPACES = [
|
||
"temple courtyards and entry paths",
|
||
"a village ceremony you observe respectfully",
|
||
"a traditional market where everyday ritual shows up in small ways",
|
||
"a dance performance where you try to read symbolism",
|
||
"a craft workshop focused on meaning and lineage, not souvenirs",
|
||
"a community space where offerings are prepared",
|
||
"a quiet heritage walk where stories feel layered",
|
||
]
|
||
|
||
RITUAL_ETIQUETTE_TOPICS = [
|
||
"dress codes and modesty",
|
||
"offerings and what not to touch",
|
||
"photography boundaries",
|
||
"when to speak vs stay quiet",
|
||
"how to move through a temple without intruding",
|
||
"how to ask questions without turning sacred life into content",
|
||
]
|
||
|
||
MEANING_MAKING = [
|
||
"a sense of humility",
|
||
"a feeling of gratitude",
|
||
"a moment of awe",
|
||
"a feeling of being a guest",
|
||
"a sense of calm",
|
||
"a quiet emotional reset",
|
||
"a shift in how you see daily life",
|
||
"a stronger respect for local rhythms",
|
||
]
|
||
|
||
AUTHENTICITY_CUES = [
|
||
"how people behave when no one is watching",
|
||
"whether the experience is integrated into local life",
|
||
"how money is handled (transparent vs extractive)",
|
||
"whether rules feel protective or performative",
|
||
"whether the pace allows reflection or pushes consumption",
|
||
]
|
||
|
||
CROWDING_COMMODIFICATION = [
|
||
"overt commercialization around sacred spaces",
|
||
"crowds that change the emotional tone",
|
||
"performative 'authenticity' for tourists",
|
||
"feeling like sacredness is being packaged",
|
||
]
|
||
|
||
CONTEXTS = [
|
||
"early morning before the crowds",
|
||
"late afternoon when light softens and things slow down",
|
||
"during a local ceremony where you are clearly a guest",
|
||
"in rainy season when plans change and patience matters",
|
||
"on a quiet weekday compared to a busy weekend",
|
||
"with a local guide who emphasizes respect and context",
|
||
"solo, when you can be more contemplative",
|
||
"as a repeat visitor, noticing subtler layers",
|
||
]
|
||
|
||
TRAVELER_PROFILE = [
|
||
"a culture-first traveler",
|
||
"a spirituality-curious traveler",
|
||
"a respectful observer who avoids intrusive tourism",
|
||
"a slow traveler seeking depth over volume",
|
||
"a repeat visitor looking for subtler, less packaged experiences",
|
||
]
|
||
|
||
CONSTRAINTS = [
|
||
(
|
||
"time",
|
||
[
|
||
"you only have 6 hours but want depth, not a checklist",
|
||
"you have one full day and want it to feel coherent and meaningful",
|
||
"you have three days and want a gentle pace with time for reflection",
|
||
"you can only travel within a short radius and must choose carefully",
|
||
],
|
||
),
|
||
(
|
||
"budget",
|
||
[
|
||
"you have a modest budget but still want cultural depth and fairness",
|
||
"you'll pay more if it supports local communities transparently",
|
||
"you want predictable costs and dislike hidden fees around sacred sites",
|
||
"you prefer smaller, community-rooted experiences over pricey packages",
|
||
],
|
||
),
|
||
(
|
||
"crowds",
|
||
[
|
||
"you want to avoid crowds because they dilute atmosphere and respect",
|
||
"you can handle crowds if etiquette and sacredness are preserved",
|
||
"you want a balance: one iconic site, mostly quieter, community-rooted places",
|
||
"you get overwhelmed by busy places and need calmer, respectful alternatives",
|
||
],
|
||
),
|
||
(
|
||
"weather",
|
||
[
|
||
"it's rainy season and flexibility is part of respectful travel",
|
||
"it's very hot and you need a pace that still feels mindful",
|
||
"visibility is low and your sunrise plan may fail—how do you adapt meaningfully?",
|
||
"roads feel unsafe, so you prioritize fewer moves and deeper presence",
|
||
],
|
||
),
|
||
(
|
||
"mobility",
|
||
[
|
||
"you avoid steep stairs but still want meaningful cultural/spiritual moments",
|
||
"you prefer not to ride a scooter and want low-friction transport options",
|
||
"you want minimal walking but still want authenticity and atmosphere",
|
||
"you need frequent rest and prefer fewer transitions",
|
||
],
|
||
),
|
||
(
|
||
"ethics",
|
||
[
|
||
"you want to avoid commodifying sacred life",
|
||
"you prioritize local benefit, consent, and respectful boundaries",
|
||
"you avoid experiences that pressure locals to perform for tourists",
|
||
"you want your presence to feel like 'being a guest' not 'taking'",
|
||
],
|
||
),
|
||
]
|
||
|
||
TRADEOFFS = [
|
||
("depth of understanding", "convenience"),
|
||
("sacredness", "accessibility"),
|
||
("quiet reflection", "seeing iconic places"),
|
||
("guided cultural context", "self-guided freedom"),
|
||
("photography", "presence and respect"),
|
||
("predictable pricing", "spontaneous discovery"),
|
||
("community benefit", "personal comfort"),
|
||
("slow pace", "variety of stops"),
|
||
]
|
||
|
||
CONTRASTS = [
|
||
("a popular temple area", "a quieter village setting"),
|
||
("a curated tour script", "a guide who shares context and encourages respect"),
|
||
("a crowded ceremony-adjacent spot", "a calm everyday ritual moment"),
|
||
(
|
||
"a market aisle focused on souvenirs",
|
||
"a market moment that shows daily offerings and rhythm",
|
||
),
|
||
("a rushed checklist day", "a slower day with fewer places but deeper presence"),
|
||
("an 'Instagram moment'", "a moment of quiet meaning that you don't photograph"),
|
||
]
|
||
|
||
INTERVIEW_STYLES = [
|
||
"Tell me about a time when…",
|
||
"Walk me through…",
|
||
"As a culturally/spiritually motivated traveler, how do you…",
|
||
"If you had to advise a tourism marketer focused on respectful cultural travel…",
|
||
"What surprised you about the spiritual or cultural texture of…",
|
||
"What does 'authentic and respectful' look like to you when…",
|
||
"How do you personally decide whether to join, observe, or step back when…",
|
||
]
|
||
|
||
FOLLOWUP_PROBES = [
|
||
"What specifically made it feel respectful or not?",
|
||
"What did you notice first, and what happened next?",
|
||
"How did it change your mood or sense of meaning that day?",
|
||
"What would have improved it without turning it into a spectacle?",
|
||
"What boundary would you not cross again?",
|
||
"What would you tell a marketer to never claim in messaging?",
|
||
]
|
||
|
||
DIM_THEMES: Dict[str, List[str]] = {
|
||
"Natural Attractions": [
|
||
"sense of place and meaning",
|
||
"quiet awe vs spectacle",
|
||
"timing for contemplative experience",
|
||
"routes that support reflection",
|
||
"respectful behavior in nature",
|
||
"access vs sacred calm",
|
||
],
|
||
"Atmosphere": [
|
||
"sacredness and emotional tone",
|
||
"authenticity cues",
|
||
"commercialization pressure",
|
||
"silence, sound, and pace",
|
||
"crowds and reverence",
|
||
"ritual context shaping ambience",
|
||
],
|
||
"Social Environment": [
|
||
"being a guest and practicing humility",
|
||
"consent and boundaries",
|
||
"guide trust and cultural context",
|
||
"respectful interaction with locals",
|
||
"tourist behavior that disrupts",
|
||
"learning without extracting",
|
||
],
|
||
"Infrastructure": [
|
||
"signage for etiquette",
|
||
"visitor flow that protects sacred spaces",
|
||
"frictionless but respectful access",
|
||
"toilets/rest areas without degrading atmosphere",
|
||
"transparent ticketing/donations",
|
||
"accessibility with dignity",
|
||
],
|
||
"Value for Money": [
|
||
"fairness and transparency",
|
||
"donations vs fees",
|
||
"paying for guides as cultural mediation",
|
||
"avoiding extractive 'spiritual packages'",
|
||
"community benefit",
|
||
"what feels worth paying for (context, respect, time)",
|
||
],
|
||
}
|
||
|
||
|
||
# -----------------------------
|
||
# Templates
|
||
# -----------------------------
|
||
def tmpl_single_dimension(
|
||
d: str, theme: str, style: str, place_hint: str, context: str
|
||
) -> str:
|
||
return (
|
||
f"{style} your experience with {place_hint} in Bali during {context}. "
|
||
f"From a {d} perspective, what stands out about {theme}—and why does it matter to you as a culture/spirit-oriented traveler?"
|
||
)
|
||
|
||
|
||
def tmpl_laddering(d: str, theme: str, context: str, meaning: str) -> str:
|
||
return (
|
||
f"Think about a specific moment in Bali during {context} that left you with {meaning}. "
|
||
f"What happened, how did you interpret it, and why did it feel meaningful? "
|
||
f"Frame your answer through {d} (focus on {theme})."
|
||
)
|
||
|
||
|
||
def tmpl_contrast(d: str, a: str, b: str, context: str, cue: str) -> str:
|
||
return (
|
||
f"Compare {a} versus {b} in Bali during {context}. "
|
||
f"In terms of {d}, how do they differ for you as a respectful, culture/spirit-first traveler? "
|
||
f"Use {cue} as a cue in your explanation."
|
||
)
|
||
|
||
|
||
def tmpl_tradeoff(d1: str, d2: str, x: str, y: str, constraint: str) -> str:
|
||
return (
|
||
f"Under this constraint: {constraint}. "
|
||
f"How do you trade off {x} versus {y} when choosing cultural/spiritual experiences in Bali? "
|
||
f"Answer with examples touching {d1} and {d2}."
|
||
)
|
||
|
||
|
||
def tmpl_marketer_advice(d: str, theme: str, constraint: str, dont_claim: str) -> str:
|
||
return (
|
||
f"If you had to advise a tourism marketer for culturally/spiritually interested travelers: under the constraint '{constraint}', "
|
||
f"what should they understand about {d} (especially {theme})? "
|
||
f"Also: what is one thing they should NOT claim in messaging because it would feel misleading or disrespectful—e.g., {dont_claim}?"
|
||
)
|
||
|
||
|
||
def tmpl_etiquette_scenario(d: str, topic: str, context: str) -> str:
|
||
return (
|
||
f"Walk me through an etiquette situation related to {topic} in Bali during {context}. "
|
||
f"What did you do, what did you avoid, and what would you want a marketer to communicate to travelers upfront? "
|
||
f"Connect it to {d}."
|
||
)
|
||
|
||
|
||
def tmpl_route_design(
|
||
d: str, nature_hint: str, culture_hint: str, constraint: str
|
||
) -> str:
|
||
return (
|
||
f"Design a mini day-route that combines {nature_hint} and {culture_hint} under this constraint: {constraint}. "
|
||
f"How would you protect atmosphere and respect while still making it accessible to culture/spirit-first travelers? Link your reasoning to {d}."
|
||
)
|
||
|
||
|
||
def tmpl_probe_followup(base_q: str, probe: str) -> str:
|
||
return f"{base_q} {probe}"
|
||
|
||
|
||
def pick_constraint(rng: random.Random) -> Tuple[str, str]:
|
||
key, vals = rng.choice(CONSTRAINTS)
|
||
return key, rng.choice(vals)
|
||
|
||
|
||
def pick_place_hint_for_dim(d: str, rng: random.Random) -> str:
|
||
if d == "Natural Attractions":
|
||
return rng.choice(NATURE_FOR_MEANING)
|
||
return rng.choice(CULTURE_SPIRIT_SPACES)
|
||
|
||
|
||
# -----------------------------
|
||
# Generation
|
||
# -----------------------------
|
||
def generate_prompts(
|
||
n: int,
|
||
seed: int = 42,
|
||
add_followups_ratio: float = 0.35,
|
||
ensure_balance: bool = True,
|
||
) -> List[Dict]:
|
||
rng = random.Random(seed)
|
||
|
||
# Mix of question archetypes, all segment-targeted
|
||
types = [
|
||
("single", 0.24),
|
||
("laddering", 0.18),
|
||
("contrast", 0.16),
|
||
("tradeoff", 0.18),
|
||
("marketer", 0.12),
|
||
("etiquette", 0.08),
|
||
("route", 0.04),
|
||
]
|
||
type_names = [t for t, _ in types]
|
||
type_weights = [w for _, w in types]
|
||
|
||
prompts: List[Dict] = []
|
||
seen = set()
|
||
|
||
# Balanced dimension coverage
|
||
dim_cycle = []
|
||
if ensure_balance:
|
||
per_dim = max(1, n // len(DIMENSIONS))
|
||
for d in DIMENSIONS:
|
||
dim_cycle.extend([d] * per_dim)
|
||
while len(dim_cycle) < n:
|
||
dim_cycle.append(rng.choice(DIMENSIONS))
|
||
rng.shuffle(dim_cycle)
|
||
|
||
# A small set of "don't claim" examples to anchor respectful marketing constraints
|
||
DONT_CLAIM = [
|
||
"guaranteed 'authentic spirituality' on demand",
|
||
"a ceremony 'for tourists' as the main attraction",
|
||
"access to sacred spaces without emphasizing etiquette and consent",
|
||
"a 'hidden local ritual' framed as a product",
|
||
"permission to photograph everything",
|
||
]
|
||
|
||
def add_prompt(obj: Dict) -> bool:
|
||
key = re.sub(r"\s+", " ", obj["prompt"].strip().lower())
|
||
if key in seen:
|
||
return False
|
||
# hard filter: must include at least one segment anchor term
|
||
anchors = [
|
||
"respect",
|
||
"sacred",
|
||
"etiquette",
|
||
"meaning",
|
||
"authentic",
|
||
"ceremony",
|
||
"guest",
|
||
"context",
|
||
"spirit",
|
||
]
|
||
if not any(a in key for a in anchors):
|
||
return False
|
||
seen.add(key)
|
||
prompts.append(obj)
|
||
return True
|
||
|
||
max_attempts = n * 25
|
||
attempts = 0
|
||
|
||
while len(prompts) < n and attempts < max_attempts:
|
||
attempts += 1
|
||
|
||
d = (
|
||
dim_cycle[len(prompts)]
|
||
if ensure_balance and len(dim_cycle) > len(prompts)
|
||
else rng.choice(DIMENSIONS)
|
||
)
|
||
theme = rng.choice(DIM_THEMES[d])
|
||
style = rng.choice(INTERVIEW_STYLES)
|
||
context = rng.choice(CONTEXTS)
|
||
place_hint = pick_place_hint_for_dim(d, rng)
|
||
c_key, c_val = pick_constraint(rng)
|
||
|
||
t = rng.choices(type_names, weights=type_weights, k=1)[0]
|
||
|
||
if t == "single":
|
||
q = tmpl_single_dimension(d, theme, style, place_hint, context)
|
||
obj = {
|
||
"dimension": d,
|
||
"type": "single",
|
||
"prompt": q,
|
||
"tags": [d, theme, context, "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
elif t == "laddering":
|
||
meaning = rng.choice(MEANING_MAKING)
|
||
q = tmpl_laddering(d, theme, context, meaning)
|
||
obj = {
|
||
"dimension": d,
|
||
"type": "laddering",
|
||
"prompt": q,
|
||
"tags": [d, theme, context, "laddering", "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
elif t == "contrast":
|
||
a, b = rng.choice(CONTRASTS)
|
||
cue = rng.choice(AUTHENTICITY_CUES + CROWDING_COMMODIFICATION)
|
||
q = tmpl_contrast(d, a, b, context, cue)
|
||
obj = {
|
||
"dimension": d,
|
||
"type": "contrast",
|
||
"prompt": q,
|
||
"tags": [d, "contrast", context, "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
elif t == "tradeoff":
|
||
d2 = rng.choice([x for x in DIMENSIONS if x != d])
|
||
x, y = rng.choice(TRADEOFFS)
|
||
q = tmpl_tradeoff(d, d2, x, y, c_val)
|
||
obj = {
|
||
"dimension": f"{d} + {d2}",
|
||
"type": "tradeoff",
|
||
"prompt": q,
|
||
"tags": [d, d2, "tradeoff", c_key, "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
elif t == "marketer":
|
||
dont_claim = rng.choice(DONT_CLAIM)
|
||
q = tmpl_marketer_advice(d, theme, c_val, dont_claim)
|
||
obj = {
|
||
"dimension": d,
|
||
"type": "marketer_advice",
|
||
"prompt": q,
|
||
"tags": [d, theme, "marketer", c_key, "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
elif t == "etiquette":
|
||
topic = rng.choice(RITUAL_ETIQUETTE_TOPICS)
|
||
q = tmpl_etiquette_scenario(d, topic, context)
|
||
obj = {
|
||
"dimension": d,
|
||
"type": "etiquette",
|
||
"prompt": q,
|
||
"tags": [d, "etiquette", topic, context, "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
elif t == "route":
|
||
nature_hint = rng.choice(NATURE_FOR_MEANING)
|
||
culture_hint = rng.choice(CULTURE_SPIRIT_SPACES)
|
||
q = tmpl_route_design(d, nature_hint, culture_hint, c_val)
|
||
obj = {
|
||
"dimension": d,
|
||
"type": "route_design",
|
||
"prompt": q,
|
||
"tags": [d, "route", c_key, "segment:culture-spirit"],
|
||
}
|
||
ok = add_prompt(obj)
|
||
|
||
else:
|
||
ok = False
|
||
|
||
# follow-up probe variant
|
||
if ok and rng.random() < add_followups_ratio and len(prompts) < n:
|
||
probe = rng.choice(FOLLOWUP_PROBES)
|
||
q2 = tmpl_probe_followup(prompts[-1]["prompt"], probe)
|
||
obj2 = {
|
||
"dimension": prompts[-1]["dimension"],
|
||
"type": prompts[-1]["type"] + "+probe",
|
||
"prompt": q2,
|
||
"tags": prompts[-1]["tags"] + ["probe"],
|
||
}
|
||
add_prompt(obj2)
|
||
|
||
if len(prompts) < n:
|
||
print(f"Warning: only generated {len(prompts)} unique prompts (requested {n}).")
|
||
|
||
return prompts[:n]
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument(
|
||
"--n",
|
||
type=int,
|
||
default=600,
|
||
help="Number of prompts to generate (300–1000 recommended).",
|
||
)
|
||
ap.add_argument("--seed", type=int, default=42)
|
||
ap.add_argument("--out", default="culture_spirit_interview_prompts.jsonl")
|
||
ap.add_argument("--format", choices=["jsonl", "txt"], default="jsonl")
|
||
ap.add_argument(
|
||
"--no_balance",
|
||
action="store_true",
|
||
help="Disable balanced coverage across dimensions.",
|
||
)
|
||
ap.add_argument("--followups_ratio", type=float, default=0.35)
|
||
args = ap.parse_args()
|
||
|
||
prompts = generate_prompts(
|
||
n=args.n,
|
||
seed=args.seed,
|
||
add_followups_ratio=args.followups_ratio,
|
||
ensure_balance=not args.no_balance,
|
||
)
|
||
|
||
if args.format == "jsonl":
|
||
with open(args.out, "w", encoding="utf-8") as f:
|
||
for p in prompts:
|
||
f.write(json.dumps(p, ensure_ascii=False) + "\n")
|
||
else:
|
||
with open(args.out, "w", encoding="utf-8") as f:
|
||
for p in prompts:
|
||
f.write(p["prompt"].strip() + "\n")
|
||
|
||
print(f"Saved {len(prompts)} prompts to: {args.out} ({args.format})")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|