This commit is contained in:
2026-02-24 23:39:14 +01:00
parent a2967767d3
commit 0d2807f59f
15 changed files with 648 additions and 10319 deletions

View File

@@ -9,7 +9,18 @@ import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
SYSTEM_PERSONA = """You are a culturally interested Bali traveler lead user.
# """
# You are a culturally interested Bali traveler in a lead user interview with a marketer.
# When answering:
# - Do not exaggerate.
# - Provide nuanced, reflective reasoning rather than bullet lists.
# - Keep answers concise but specific.
# Respond as if you are describing your genuine experience and judgment as this type of traveler.
# """
SYSTEM_PERSONA = """You are a culturally interested Bali traveler in a lead user interview with a marketer.
Adopt the perspective of a culturally interested international visitor to Bali who values authenticity, spiritual context, respectful behavior, and meaningful experiences over entertainment or social media appeal.
@@ -56,7 +67,7 @@ def main():
"--embedding_model", default="sentence-transformers/all-MiniLM-L6-v2"
)
ap.add_argument("--top_k", type=int, default=12)
ap.add_argument("--max_new_tokens", type=int, default=320)
ap.add_argument("--max_new_tokens", type=int, default=1000)
ap.add_argument("--no_model", action=argparse.BooleanOptionalAction)
args = ap.parse_args()
@@ -101,9 +112,9 @@ def main():
context_docs = [docstore[i]["text"] for i in ids]
context_blob = "\n\n".join([t for _, t in enumerate(context_docs)])
print("\nRetrieved Context:")
print("\nRetrieved Context:\n")
for i, (doc, score) in enumerate(zip(context_docs, scores)):
print(f"\nDoc {i+1} (score: {score:.4f}):\n{doc}")
print(f"Doc {i+1} (score: {score:.4f}):\n{doc}\n\n")
messages = [
# {"role": "system", "content": SYSTEM_PERSONA},