mirror of
https://github.com/marvinscham/masterthesis-playground.git
synced 2026-03-22 00:12:42 +01:00
RAFT updates, BERTopic config, cleanup
This commit is contained in:
@@ -10,11 +10,25 @@ from sentence_transformers import SentenceTransformer
|
||||
from tqdm import tqdm
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
SYSTEM_PERSONA = """You are 'BaliTwin', a culturally versed Bali traveler.
|
||||
You give your opinions nand guidance with local etiquette and context.
|
||||
You avoid stereotypes. You explain local etiquette, customs, and context.
|
||||
When given CONTEXT, you must ground your answer in it and include 1-2 short direct quotes from CONTEXT as evidence.
|
||||
If CONTEXT doesn't support the claim, say you don't know from the provided context.
|
||||
SYSTEM_PERSONA = """
|
||||
You are responding as a culturally and spiritually motivated traveler in Bali.
|
||||
|
||||
Adopt the perspective of a reflective, experienced visitor who prioritizes ritual meaning, cultural integrity, spiritual atmosphere, and respectful engagement over entertainment, convenience, or social media appeal.
|
||||
|
||||
When answering:
|
||||
|
||||
- Emphasize cultural depth, ritual context, symbolism, and spiritual atmosphere.
|
||||
- Reflect on authenticity and the tension between sacred meaning and tourism.
|
||||
- Weigh crowding, commercialization, and infrastructure in a nuanced way rather than giving extreme judgments.
|
||||
- Frame value primarily in emotional, cultural, or spiritual terms — not primarily in price or comfort.
|
||||
- Show awareness of appropriate visitor behavior and respect for local practices.
|
||||
- Avoid generic travel advice, promotional language, or itinerary-style responses.
|
||||
- Write in a thoughtful, first-person perspective.
|
||||
- Provide reasoned, differentiated answers rather than short summaries.
|
||||
- Do not list bullet points unless explicitly asked.
|
||||
- Keep answers focused on the question.
|
||||
|
||||
Maintain consistency with this identity across all responses.
|
||||
"""
|
||||
|
||||
|
||||
@@ -35,14 +49,29 @@ def retrieve(index, embedder, query, top_k=6):
|
||||
@torch.no_grad()
|
||||
def generate_text(model, tok, messages, max_new_tokens=220, temperature=0.7):
|
||||
# Using tokenizer chat template where available
|
||||
input_ids = tok.apply_chat_template(messages, return_tensors="pt").to(model.device)
|
||||
enc = tok.apply_chat_template(
|
||||
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
|
||||
)
|
||||
|
||||
if isinstance(enc, torch.Tensor):
|
||||
input_ids = enc.to(model.device)
|
||||
attention_mask = torch.ones_like(input_ids, device=model.device)
|
||||
else:
|
||||
input_ids = enc["input_ids"].to(model.device)
|
||||
attention_mask = enc.get("attention_mask")
|
||||
if attention_mask is None:
|
||||
attention_mask = torch.ones_like(input_ids)
|
||||
attention_mask = attention_mask.to(model.device)
|
||||
|
||||
out = model.generate(
|
||||
input_ids=input_ids,
|
||||
attention_mask=attention_mask,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=True,
|
||||
temperature=temperature,
|
||||
top_p=0.9,
|
||||
eos_token_id=tok.eos_token_id,
|
||||
pad_token_id=tok.pad_token_id,
|
||||
)
|
||||
return tok.decode(out[0][input_ids.shape[1] :], skip_special_tokens=True).strip()
|
||||
|
||||
@@ -57,7 +86,7 @@ def main():
|
||||
ap.add_argument("--n_examples", type=int, default=5000)
|
||||
ap.add_argument("--top_k", type=int, default=6)
|
||||
ap.add_argument("--n_distractors", type=int, default=3)
|
||||
ap.add_argument("--seed", type=int, default=7)
|
||||
ap.add_argument("--seed", type=int, default=42)
|
||||
args = ap.parse_args()
|
||||
|
||||
random.seed(args.seed)
|
||||
@@ -89,7 +118,7 @@ def main():
|
||||
{"role": "system", "content": SYSTEM_PERSONA},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Create ONE realistic traveler question about Bali that can be answered using ONLY the CONTEXT.\n\n"
|
||||
"content": "Create ONE realistic question from the perspective of a culturally and spiritually interested traveler in Bali that can be answered using ONLY the CONTEXT.\n\n"
|
||||
f"CONTEXT:\n{gold_text}\n\n"
|
||||
"Return only the question.",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user