mirror of
https://github.com/marvinscham/masterthesis-playground.git
synced 2026-03-22 00:12:42 +01:00
Cleanup
This commit is contained in:
577
bertopic/nb_bertopic.py
Normal file
577
bertopic/nb_bertopic.py
Normal file
@@ -0,0 +1,577 @@
|
||||
# ---
|
||||
# jupyter:
|
||||
# jupytext:
|
||||
# text_representation:
|
||||
# extension: .py
|
||||
# format_name: percent
|
||||
# format_version: '1.3'
|
||||
# jupytext_version: 1.18.0
|
||||
# kernelspec:
|
||||
# display_name: .venv
|
||||
# language: python
|
||||
# name: python3
|
||||
# ---
|
||||
|
||||
# %% [markdown]
|
||||
# # Topic Detection: Bali Tourist Reviews
|
||||
#
|
||||
|
||||
# %% [markdown]
|
||||
# ## Preparation
|
||||
#
|
||||
# ### Dependency Loading
|
||||
#
|
||||
|
||||
# %%
|
||||
import json
|
||||
import pickle
|
||||
import re
|
||||
|
||||
import gensim.corpora as corpora
|
||||
import nltk
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import spacy
|
||||
from bertopic.representation import KeyBERTInspired
|
||||
from bertopic.vectorizers import ClassTfidfTransformer
|
||||
from gensim.models.coherencemodel import CoherenceModel
|
||||
from hdbscan import HDBSCAN
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import WordNetLemmatizer
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from umap import UMAP
|
||||
|
||||
from bertopic import BERTopic
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
nltk.download("stopwords")
|
||||
nltk.download("punkt")
|
||||
nltk.download("wordnet")
|
||||
|
||||
# %% [markdown]
|
||||
# ### Parameters and Tracking
|
||||
#
|
||||
|
||||
# %%
|
||||
RECREATE_MODEL = True
|
||||
RECREATE_REDUCED_MODEL = True
|
||||
PROCESS_DATA = False
|
||||
REDUCE_OUTLIERS = True
|
||||
USE_CONDENSED_MODEL = False
|
||||
|
||||
DATA_SAMPLE_SIZE = -1 # -1 for all data
|
||||
|
||||
# Classical coherence score. Warning: needs swap to not kill your PC
|
||||
CALCULATE_COHERENCE = False
|
||||
|
||||
# Vectorization
|
||||
MIN_DOCUMENT_FREQUENCY = 1
|
||||
MAX_NGRAM = 2
|
||||
|
||||
# HDBSCAN Parameters
|
||||
MIN_TOPIC_SIZE = 200
|
||||
MIN_SAMPLES = 25
|
||||
|
||||
# UMAP Parameters
|
||||
N_NEIGHBORS = 15
|
||||
N_COMPONENTS = 2
|
||||
MIN_DIST = 0.01
|
||||
|
||||
# Topic Modeling
|
||||
TOP_N_WORDS = 10
|
||||
MAX_TOPICS = None # or "auto" to pass to HDBSCAN, None to skip
|
||||
|
||||
# %% [markdown]
|
||||
# ### Data Loading & Preprocessing
|
||||
#
|
||||
|
||||
# %%
|
||||
if DATA_SAMPLE_SIZE != -1:
|
||||
reviews = (
|
||||
pd.read_csv("../data/original/reviews.tab", sep="\t")
|
||||
.sample(n=DATA_SAMPLE_SIZE)
|
||||
.review.dropna()
|
||||
.to_list()
|
||||
)
|
||||
else:
|
||||
reviews = (
|
||||
pd.read_csv("../data/original/reviews.tab", sep="\t").review.dropna().to_list()
|
||||
)
|
||||
|
||||
print("Loaded {} reviews".format(len(reviews)))
|
||||
|
||||
# %%
|
||||
# List of NE in Bali for NER enhancement
|
||||
with open("../data/supporting/bali_ner.json", "r") as f:
|
||||
bali_places = json.load(f)
|
||||
bali_places_set = set(bali_places)
|
||||
|
||||
# Stop word definition
|
||||
extra_stopwords = ["bali", "idr", "usd"]
|
||||
stop_words = set(stopwords.words("english"))
|
||||
with open("../data/supporting/stopwords-en.json", "r") as f:
|
||||
extra_stopwords.extend(json.load(f))
|
||||
|
||||
# Custom replacements
|
||||
rep = {
|
||||
r"\\n": " ",
|
||||
r"\n": " ",
|
||||
r'\\"': "",
|
||||
r'"': "",
|
||||
"mongkey": "monkey",
|
||||
"monky": "monkey",
|
||||
"verry": "very",
|
||||
}
|
||||
rep = dict((re.escape(k), v) for k, v in rep.items())
|
||||
pattern = re.compile("|".join(rep.keys()))
|
||||
|
||||
lemmatizer = WordNetLemmatizer()
|
||||
|
||||
|
||||
def preprocess(text):
|
||||
# Step 1: Apply custom replacements (typos, special cases)
|
||||
text = text.lower()
|
||||
text = pattern.sub(lambda m: rep[re.escape(m.group(0))], text)
|
||||
|
||||
# Step 2: Clean text
|
||||
text = re.sub(r"\d+", " ", text)
|
||||
text = re.sub(r"\W+", " ", text)
|
||||
|
||||
doc = nlp(text)
|
||||
|
||||
# Step 3: POS tagging and filtering
|
||||
filtered_tokens = [
|
||||
token.text
|
||||
for token in doc
|
||||
if token.pos_ in {"NOUN", "PROPN"}
|
||||
or token.ent_type_ in {"GPE", "LOC", "FAC"}
|
||||
or token.text in bali_places_set
|
||||
]
|
||||
|
||||
# Step 4: Lemmatization and stopword removal
|
||||
lemmatized_tokens = [
|
||||
lemmatizer.lemmatize(w)
|
||||
for w in filtered_tokens
|
||||
if w not in stop_words and w not in extra_stopwords and len(w) > 2
|
||||
]
|
||||
|
||||
return lemmatized_tokens
|
||||
|
||||
|
||||
# %%
|
||||
if PROCESS_DATA:
|
||||
print("Processing reviews...")
|
||||
reviews = [preprocess(review) for review in reviews]
|
||||
|
||||
with open("../data/intermediate/processed_texts.pkl", "wb") as f:
|
||||
pickle.dump(reviews, f)
|
||||
else:
|
||||
with open("../data/intermediate/processed_texts.pkl", "rb") as f:
|
||||
reviews = pickle.load(f)
|
||||
reviews = [
|
||||
" ".join(review) if isinstance(review, list) else review
|
||||
for review in reviews
|
||||
]
|
||||
|
||||
print(reviews[:1])
|
||||
|
||||
# %% [markdown]
|
||||
# ### Pre-calculate Embeddings
|
||||
#
|
||||
|
||||
# %%
|
||||
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
embeddings = embedding_model.encode(reviews, show_progress_bar=True)
|
||||
|
||||
# %% [markdown]
|
||||
# ## Model Creation
|
||||
#
|
||||
|
||||
# %% [markdown]
|
||||
# ### Dimensionality Reduction (UMAP)
|
||||
#
|
||||
|
||||
# %%
|
||||
umap_model = UMAP(
|
||||
n_neighbors=N_NEIGHBORS,
|
||||
n_components=N_COMPONENTS,
|
||||
min_dist=MIN_DIST,
|
||||
metric="cosine",
|
||||
low_memory=True,
|
||||
random_state=42,
|
||||
)
|
||||
reduced_embeddings = umap_model.fit_transform(embeddings)
|
||||
|
||||
# %% [markdown]
|
||||
# ### BERTopic Model Creation
|
||||
#
|
||||
|
||||
# %%
|
||||
if RECREATE_MODEL:
|
||||
ctfidf_model = ClassTfidfTransformer(reduce_frequent_words=True)
|
||||
vectorizer_model = CountVectorizer(
|
||||
min_df=MIN_DOCUMENT_FREQUENCY, ngram_range=(1, MAX_NGRAM)
|
||||
)
|
||||
|
||||
representation_model = KeyBERTInspired()
|
||||
hdbscan_model = HDBSCAN(
|
||||
min_cluster_size=MIN_TOPIC_SIZE,
|
||||
min_samples=MIN_SAMPLES,
|
||||
metric="euclidean",
|
||||
cluster_selection_method="eom",
|
||||
gen_min_span_tree=True,
|
||||
prediction_data=True,
|
||||
)
|
||||
|
||||
topic_model = BERTopic(
|
||||
embedding_model=embedding_model,
|
||||
ctfidf_model=ctfidf_model,
|
||||
vectorizer_model=vectorizer_model,
|
||||
umap_model=umap_model,
|
||||
hdbscan_model=hdbscan_model,
|
||||
representation_model=representation_model,
|
||||
verbose=True,
|
||||
calculate_probabilities=True,
|
||||
language="english",
|
||||
top_n_words=TOP_N_WORDS,
|
||||
nr_topics=MAX_TOPICS,
|
||||
)
|
||||
|
||||
topics, probs = topic_model.fit_transform(reviews, embeddings=embeddings)
|
||||
|
||||
topic_labels = topic_model.generate_topic_labels(
|
||||
nr_words=3, topic_prefix=True, word_length=15, separator=" - "
|
||||
)
|
||||
topic_model.set_topic_labels(topic_labels)
|
||||
BERTopic.save(topic_model, "output/model.bertopic")
|
||||
else:
|
||||
print("Nevermind, loading existing model")
|
||||
topic_model = BERTopic.load("output/model.bertopic")
|
||||
|
||||
# %% [markdown]
|
||||
# ## Fine Tuning
|
||||
#
|
||||
# ### Topic Condensation
|
||||
#
|
||||
|
||||
# %%
|
||||
if RECREATE_REDUCED_MODEL:
|
||||
done = False
|
||||
iteration = 1
|
||||
while not done:
|
||||
print(f"Iteration {iteration}")
|
||||
iteration += 1
|
||||
similarity_matrix = cosine_similarity(
|
||||
np.array(topic_model.topic_embeddings_)[1:, :]
|
||||
)
|
||||
nothing_to_merge = True
|
||||
|
||||
for i in range(similarity_matrix.shape[0]):
|
||||
for j in range(i + 1, similarity_matrix.shape[1]):
|
||||
sim = similarity_matrix[i, j]
|
||||
if sim > 0.9:
|
||||
nothing_to_merge = False
|
||||
t1, t2 = i, j
|
||||
try:
|
||||
t1_name = topic_model.get_topic_info(t1)["CustomName"][0]
|
||||
t2_name = topic_model.get_topic_info(t2)["CustomName"][0]
|
||||
print(
|
||||
f"Merging topics {t1} ({t1_name}) and {t2} ({t2_name}) with similarity {sim:.2f}"
|
||||
)
|
||||
topic_model.merge_topics(reviews, topics_to_merge=[t1, t2])
|
||||
|
||||
topic_labels = topic_model.generate_topic_labels(
|
||||
nr_words=3,
|
||||
topic_prefix=True,
|
||||
word_length=15,
|
||||
separator=" - ",
|
||||
)
|
||||
topic_model.set_topic_labels(topic_labels)
|
||||
except Exception as e:
|
||||
print(f"Failed to merge {t1} and {t2}: {e}")
|
||||
if nothing_to_merge:
|
||||
print("No more topics to merge.")
|
||||
done = True
|
||||
|
||||
# BERTopic.save(topic_model, "bertopic/model_reduced.bertopic")
|
||||
elif USE_CONDENSED_MODEL:
|
||||
print("Nevermind, loading existing reduced model")
|
||||
topic_model = BERTopic.load("bertopic/model_reduced.bertopic")
|
||||
else:
|
||||
print("Skipping topic reduction")
|
||||
|
||||
# %% [markdown]
|
||||
# ### Outlier Reduction
|
||||
#
|
||||
|
||||
# %%
|
||||
if REDUCE_OUTLIERS:
|
||||
new_topics = topic_model.reduce_outliers(
|
||||
reviews,
|
||||
topic_model.topics_,
|
||||
probabilities=topic_model.probabilities_,
|
||||
threshold=0.05,
|
||||
strategy="probabilities",
|
||||
)
|
||||
topic_model.update_topics(reviews, topics=new_topics)
|
||||
|
||||
# %% [markdown]
|
||||
# ## Results
|
||||
#
|
||||
# ### Classification
|
||||
#
|
||||
|
||||
# %%
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
# --- config ---
|
||||
topics_to_keep = {2, 4, 6, 8, 10, 5, 7}
|
||||
INPUT_PATH = "../data/original/reviews.tab" # TSV with a 'review' column
|
||||
OUTPUT_CSV = "../data/intermediate/selected_topics_documents.csv"
|
||||
OUTPUT_DIR = Path("../raft/corpus")
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
BATCH_SIZE = 60
|
||||
MIN_CHARS = 40
|
||||
SEED = 42
|
||||
|
||||
# --- load data ---
|
||||
data = pd.read_csv(INPUT_PATH, sep="\t")
|
||||
|
||||
# If you already have `reviews` elsewhere, replace the next line with that variable
|
||||
reviews = data["review"].astype(str).fillna("")
|
||||
|
||||
# Topic model document info
|
||||
df = topic_model.get_document_info(reviews) # assumes your model is already fitted
|
||||
df["Original"] = reviews.values
|
||||
|
||||
# --- filter by topics and length ---
|
||||
filtered = df[df["Topic"].isin(topics_to_keep)].copy()
|
||||
filtered["Original"] = filtered["Original"].str.strip()
|
||||
filtered = filtered[filtered["Original"].str.len() >= MIN_CHARS]
|
||||
|
||||
# Save an audit CSV
|
||||
filtered[["Original", "Topic"]].to_csv(OUTPUT_CSV, index=False)
|
||||
|
||||
# --- deterministic shuffle + write batched corpus files ---
|
||||
total_files = 0
|
||||
total_reviews = 0
|
||||
rng = random.Random(SEED)
|
||||
|
||||
for topic_val, g in filtered.groupby("Topic", sort=True):
|
||||
reviews_list = g["Original"].tolist()
|
||||
|
||||
# deterministic shuffle within topic
|
||||
rng.shuffle(reviews_list)
|
||||
|
||||
# chunk into batches of up to 60
|
||||
for start in range(0, len(reviews_list), BATCH_SIZE):
|
||||
chunk = reviews_list[start : start + BATCH_SIZE]
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
# simple header for traceability
|
||||
header = (
|
||||
f"[TOPIC] {topic_val}\n" f"[Stats] N={len(chunk)} | Source={INPUT_PATH}\n"
|
||||
)
|
||||
|
||||
lines = [header, ""]
|
||||
for i, txt in enumerate(chunk, 1):
|
||||
lines.append(f"({i}) {txt}")
|
||||
|
||||
part_idx = start // BATCH_SIZE + 1
|
||||
fname = f"topic={topic_val}__part={part_idx:03d}__n={len(chunk)}.txt"
|
||||
(OUTPUT_DIR / fname).write_text("\n".join(lines), encoding="utf-8")
|
||||
|
||||
total_files += 1
|
||||
total_reviews += len(chunk)
|
||||
|
||||
print(
|
||||
f"[green]Wrote {total_files} docs with {total_reviews} reviews to {OUTPUT_DIR}[/green]"
|
||||
)
|
||||
print(f"[green]Filtered CSV saved to {OUTPUT_CSV}[/green]")
|
||||
|
||||
# %%
|
||||
doc_topic_matrix = probs
|
||||
|
||||
# column names
|
||||
topicnames = ["Topic " + str(i) for i in range(len(set(topics)) - 1)]
|
||||
|
||||
# index names
|
||||
docnames = ["Review " + str(i) for i in range(len(reviews))]
|
||||
|
||||
# Make the pandas dataframe
|
||||
df_document_topic = pd.DataFrame(
|
||||
np.round(doc_topic_matrix, 2), columns=topicnames, index=docnames
|
||||
)
|
||||
|
||||
# Get dominant topic for each document
|
||||
dominant_topic = np.argmax(doc_topic_matrix, axis=1)
|
||||
df_document_topic["dominant_topic"] = dominant_topic
|
||||
|
||||
|
||||
# Styling
|
||||
def color_stuff(val):
|
||||
if val > 0.1:
|
||||
color = "green"
|
||||
elif val > 0.05:
|
||||
color = "orange"
|
||||
else:
|
||||
color = "grey"
|
||||
return "color: {col}".format(col=color)
|
||||
|
||||
|
||||
def make_bold(val):
|
||||
weight = 700 if val > 0.1 else 400
|
||||
return "font-weight: {weight}".format(weight=weight)
|
||||
|
||||
|
||||
# Apply Style
|
||||
df_document_topics = (
|
||||
df_document_topic.head(15).style.applymap(color_stuff).applymap(make_bold)
|
||||
)
|
||||
df_document_topics
|
||||
|
||||
# %% [markdown]
|
||||
# ### Document Visualization
|
||||
#
|
||||
|
||||
# %%
|
||||
vis = topic_model.visualize_documents(
|
||||
docs=reviews,
|
||||
reduced_embeddings=reduced_embeddings,
|
||||
custom_labels=True,
|
||||
hide_annotations=True,
|
||||
)
|
||||
vis.write_html("output/visualization.html")
|
||||
vis
|
||||
|
||||
# %% [markdown]
|
||||
# ### Similarity Matrix
|
||||
#
|
||||
|
||||
# %%
|
||||
topic_model.visualize_heatmap()
|
||||
|
||||
# %% [markdown]
|
||||
# ### Topic Info
|
||||
#
|
||||
|
||||
# %%
|
||||
topic_model.get_topic_info()
|
||||
|
||||
# %% [markdown]
|
||||
# ### Semantic Coherence
|
||||
#
|
||||
|
||||
# %%
|
||||
topic_words = []
|
||||
for topic_id in topic_model.get_topic_info()["Topic"]:
|
||||
# Skip outlier topic
|
||||
if topic_id < 0:
|
||||
continue
|
||||
|
||||
words = [word for word, _ in topic_model.get_topic(topic_id)]
|
||||
topic_words.append(words)
|
||||
|
||||
# Compute mean pairwise cosine similarity for each topic
|
||||
coherence_scores = []
|
||||
for words in topic_words:
|
||||
coherence_embeddings = embedding_model.encode(words)
|
||||
sim_matrix = cosine_similarity(coherence_embeddings)
|
||||
|
||||
# Ignore self-similarity
|
||||
np.fill_diagonal(sim_matrix, 0)
|
||||
mean_sim = np.mean(sim_matrix[np.triu_indices(sim_matrix.shape[0], k=1)])
|
||||
coherence_scores.append(mean_sim)
|
||||
|
||||
overall_coherence = np.mean(coherence_scores)
|
||||
|
||||
print(len(reviews), "reviews processed")
|
||||
print(len(topic_model.get_topic_info()) - 1, "topics found")
|
||||
print(f"BERT-based Topic Coherence: {overall_coherence:.4f}")
|
||||
|
||||
# %% [markdown]
|
||||
# ### Topic Coherence
|
||||
#
|
||||
|
||||
# %%
|
||||
# https://github.com/MaartenGr/BERTopic/issues/90#issuecomment-820915389
|
||||
|
||||
if CALCULATE_COHERENCE:
|
||||
# Preprocess Documents
|
||||
documents = pd.DataFrame(
|
||||
{"Document": reviews, "ID": range(len(reviews)), "Topic": topics}
|
||||
)
|
||||
documents_per_topic = documents.groupby(["Topic"], as_index=False).agg(
|
||||
{"Document": " ".join}
|
||||
)
|
||||
cleaned_docs = topic_model._preprocess_text(documents_per_topic.Document.values)
|
||||
|
||||
# Extract vectorizer and analyzer from BERTopic
|
||||
vectorizer = topic_model.vectorizer_model
|
||||
analyzer = vectorizer.build_analyzer()
|
||||
|
||||
# Extract features for Topic Coherence evaluation
|
||||
words = vectorizer.get_feature_names_out()
|
||||
tokens = [analyzer(doc) for doc in cleaned_docs]
|
||||
dictionary = corpora.Dictionary(tokens)
|
||||
corpus = [dictionary.doc2bow(token) for token in tokens]
|
||||
topic_words = [
|
||||
[words for words, _ in topic_model.get_topic(topic)]
|
||||
for topic in range(len(set(topics)) - 1)
|
||||
]
|
||||
|
||||
# %env TOKENIZERS_PARALLELISM=false
|
||||
|
||||
for measurement in ["c_v", "u_mass", "c_uci", "c_npmi"]:
|
||||
coherence_model = CoherenceModel(
|
||||
topics=topic_words,
|
||||
texts=tokens,
|
||||
corpus=corpus,
|
||||
dictionary=dictionary,
|
||||
coherence=measurement,
|
||||
)
|
||||
coherence_score = coherence_model.get_coherence()
|
||||
print(f"Coherence ({measurement}): {coherence_score:.4f}")
|
||||
else:
|
||||
print("Skipping classical coherence calculation")
|
||||
|
||||
# %% [markdown]
|
||||
# ### Term Search
|
||||
#
|
||||
|
||||
# %%
|
||||
search_term = "uluwatu"
|
||||
|
||||
similar_topics, similarities = topic_model.find_topics(search_term, top_n=10)
|
||||
for i in range(len(similar_topics)):
|
||||
# \n{topic_model.get_topic(similar_topics[i])}\n
|
||||
print(
|
||||
f"{str(similarities[i])[:5]} {topic_model.get_topic_info(similar_topics[i])["CustomName"][0]}"
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
# ### Topic Hierarchy
|
||||
#
|
||||
|
||||
# %%
|
||||
topic_model.visualize_hierarchy(custom_labels=True)
|
||||
|
||||
# %% [markdown]
|
||||
# ### Intertopic Distance Map
|
||||
#
|
||||
|
||||
# %%
|
||||
topic_model.visualize_topics()
|
||||
|
||||
# %% [markdown]
|
||||
# ### Topic Word Scores
|
||||
#
|
||||
|
||||
# %%
|
||||
topic_model.visualize_barchart(top_n_topics=12, custom_labels=True, n_words=10)
|
||||
@@ -70,8 +70,8 @@ MIN_SAMPLES = 15
|
||||
|
||||
# UMAP Parameters
|
||||
N_NEIGHBORS = 15
|
||||
N_COMPONENTS = 2
|
||||
MIN_DIST = 0.01
|
||||
N_COMPONENTS = 5
|
||||
MIN_DIST = 0.1
|
||||
|
||||
# Topic Modeling
|
||||
TOP_N_WORDS = 10
|
||||
|
||||
@@ -1,722 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.js"></script>
|
||||
<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js"></script>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.css"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css"/>
|
||||
<link rel="stylesheet" href="https://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap-glyphicons.css"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.2.0/css/all.min.css"/>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css"/>
|
||||
|
||||
<meta name="viewport" content="width=device-width,
|
||||
initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
|
||||
<style>
|
||||
#map_8827cd9e27b957cf12c465a4efd53c8e {
|
||||
position: relative;
|
||||
width: 100.0%;
|
||||
height: 100.0%;
|
||||
left: 0.0%;
|
||||
top: 0.0%;
|
||||
}
|
||||
.leaflet-container { font-size: 1rem; }
|
||||
</style>
|
||||
|
||||
<style>html, body {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>#map {
|
||||
position:absolute;
|
||||
top:0;
|
||||
bottom:0;
|
||||
right:0;
|
||||
left:0;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
L_NO_TOUCH = false;
|
||||
L_DISABLE_3D = false;
|
||||
</script>
|
||||
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
|
||||
<div class="folium-map" id="map_8827cd9e27b957cf12c465a4efd53c8e" ></div>
|
||||
|
||||
</body>
|
||||
<script>
|
||||
|
||||
|
||||
var map_8827cd9e27b957cf12c465a4efd53c8e = L.map(
|
||||
"map_8827cd9e27b957cf12c465a4efd53c8e",
|
||||
{
|
||||
center: [-8.45, 115.2],
|
||||
crs: L.CRS.EPSG3857,
|
||||
...{
|
||||
"zoom": 9,
|
||||
"zoomControl": true,
|
||||
"preferCanvas": false,
|
||||
"zoomSnap": 0.1,
|
||||
"zoomDelta": 0.1,
|
||||
}
|
||||
|
||||
}
|
||||
);
|
||||
L.control.scale().addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
var tile_layer_f4855f09fad51b54d44fb73a67dccf4e = L.tileLayer(
|
||||
"https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png",
|
||||
{
|
||||
"minZoom": 0,
|
||||
"maxZoom": 18,
|
||||
"maxNativeZoom": 18,
|
||||
"noWrap": false,
|
||||
"attribution": "\u0026copy; \u003ca href=\"https://www.openstreetmap.org/copyright\"\u003eOpenStreetMap\u003c/a\u003e contributors \u0026copy; \u003ca href=\"https://carto.com/attributions\"\u003eCARTO\u003c/a\u003e",
|
||||
"subdomains": "abcd",
|
||||
"detectRetina": false,
|
||||
"tms": false,
|
||||
"opacity": 1,
|
||||
}
|
||||
|
||||
);
|
||||
|
||||
|
||||
tile_layer_f4855f09fad51b54d44fb73a67dccf4e.addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var circle_marker_5b4ae9dceb9c71755162320a031409f2 = L.circleMarker(
|
||||
[-8.5187511, 115.2585973],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_5b4ae9dceb9c71755162320a031409f2.bindTooltip(
|
||||
`<div>
|
||||
Sacred Monkey Forest
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_602eb000016a6b30ed7c72519753de07 = L.marker(
|
||||
[-8.5187511, 115.2585973],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_452f3f1faacc701744d7c02bacafef1b = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eSacred Monkey Forest\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_602eb000016a6b30ed7c72519753de07.setIcon(div_icon_452f3f1faacc701744d7c02bacafef1b);
|
||||
|
||||
|
||||
var circle_marker_2e56d660baf35eabcbfa98ff6e8d8d11 = L.circleMarker(
|
||||
[-8.8291432, 115.0849069],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_2e56d660baf35eabcbfa98ff6e8d8d11.bindTooltip(
|
||||
`<div>
|
||||
Uluwatu Temple
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_5dd8dbfb675ede190e11f0f7ca07c3bc = L.marker(
|
||||
[-8.8291432, 115.0849069],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_2648ca76c6782f2660a05bdde37e3616 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eUluwatu Temple\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_5dd8dbfb675ede190e11f0f7ca07c3bc.setIcon(div_icon_2648ca76c6782f2660a05bdde37e3616);
|
||||
|
||||
|
||||
var circle_marker_bb05fc2ce9b498a72f2d5403de4c057a = L.circleMarker(
|
||||
[-8.673889, 115.263611],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_bb05fc2ce9b498a72f2d5403de4c057a.bindTooltip(
|
||||
`<div>
|
||||
Sanur Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_ef590832f06fd20561b013b68756a271 = L.marker(
|
||||
[-8.673889, 115.263611],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_6c27875889040e5114bd58b6dd78d565 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eSanur Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_ef590832f06fd20561b013b68756a271.setIcon(div_icon_6c27875889040e5114bd58b6dd78d565);
|
||||
|
||||
|
||||
var circle_marker_238718621a21030747436a452bfb3299 = L.circleMarker(
|
||||
[-8.618786, 115.086733],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_238718621a21030747436a452bfb3299.bindTooltip(
|
||||
`<div>
|
||||
Tanah Lot Temple
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_ae5f715c478f42e3f143541f3234b0f9 = L.marker(
|
||||
[-8.618786, 115.086733],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_66d943b7af7c007ae0e4b8134ca4900f = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eTanah Lot Temple\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_ae5f715c478f42e3f143541f3234b0f9.setIcon(div_icon_66d943b7af7c007ae0e4b8134ca4900f);
|
||||
|
||||
|
||||
var circle_marker_8771a4fca9bbd4915b07cc2700c5e89e = L.circleMarker(
|
||||
[-8.6925, 115.158611],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_8771a4fca9bbd4915b07cc2700c5e89e.bindTooltip(
|
||||
`<div>
|
||||
Seminyak Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_6bb0332dd2f02d55130e014b19bffefe = L.marker(
|
||||
[-8.6925, 115.158611],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_9a4f199406a6917c3729d735293beec4 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eSeminyak Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_6bb0332dd2f02d55130e014b19bffefe.setIcon(div_icon_9a4f199406a6917c3729d735293beec4);
|
||||
|
||||
|
||||
var circle_marker_51e42098d14cee4d8bbba1e8de44cb1a = L.circleMarker(
|
||||
[-8.791918, 115.225375],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_51e42098d14cee4d8bbba1e8de44cb1a.bindTooltip(
|
||||
`<div>
|
||||
Nusa Dua
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_6db92ef3d1d15b93e2f8951453121e0e = L.marker(
|
||||
[-8.791918, 115.225375],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_3a87774e80c4c355e408bb97f02e9e04 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eNusa Dua\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, -8],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_6db92ef3d1d15b93e2f8951453121e0e.setIcon(div_icon_3a87774e80c4c355e408bb97f02e9e04);
|
||||
|
||||
|
||||
var circle_marker_d43c0263ab8f5111318f226a7ebd0a1a = L.circleMarker(
|
||||
[-8.59128, 115.26456],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_d43c0263ab8f5111318f226a7ebd0a1a.bindTooltip(
|
||||
`<div>
|
||||
Bali Zoo
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_045f45d15d9bb0bf3544ec15c15e72ca = L.marker(
|
||||
[-8.59128, 115.26456],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_17abbfa0aa47dc5e2b90a3f3ed4031a5 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eBali Zoo\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_045f45d15d9bb0bf3544ec15c15e72ca.setIcon(div_icon_17abbfa0aa47dc5e2b90a3f3ed4031a5);
|
||||
|
||||
|
||||
var circle_marker_a7d61c5f9e133c503602ce1a176641d0 = L.circleMarker(
|
||||
[-8.23889, 115.3775],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_a7d61c5f9e133c503602ce1a176641d0.bindTooltip(
|
||||
`<div>
|
||||
Mount Batur
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_4158f6f747343e4e3a34a6decc5862c6 = L.marker(
|
||||
[-8.23889, 115.3775],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_a68ad209a222c1c6d07276e7c80e8d1c = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eMount Batur\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_4158f6f747343e4e3a34a6decc5862c6.setIcon(div_icon_a68ad209a222c1c6d07276e7c80e8d1c);
|
||||
|
||||
|
||||
var circle_marker_aed36500c42e8fc9bf3376b0e1bb2ed9 = L.circleMarker(
|
||||
[-8.275177, 115.1668487],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_aed36500c42e8fc9bf3376b0e1bb2ed9.bindTooltip(
|
||||
`<div>
|
||||
Ulun Danu Bratan
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_22a12c5d4517fbdbba1d7e4b93716e8b = L.marker(
|
||||
[-8.275177, 115.1668487],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_6ba395bc4ffc650104f4c3b4b96fa477 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eUlun Danu Bratan\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_22a12c5d4517fbdbba1d7e4b93716e8b.setIcon(div_icon_6ba395bc4ffc650104f4c3b4b96fa477);
|
||||
|
||||
|
||||
var circle_marker_9e78cc21d0b245c95b3a65818241d6b1 = L.circleMarker(
|
||||
[-8.411944, 115.5875],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_9e78cc21d0b245c95b3a65818241d6b1.bindTooltip(
|
||||
`<div>
|
||||
Tirta Gangga
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_acadfa63b305a6930490ce129db70d3c = L.marker(
|
||||
[-8.411944, 115.5875],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_6701732f8753d0cf3dd086583f966d47 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eTirta Gangga\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_acadfa63b305a6930490ce129db70d3c.setIcon(div_icon_6701732f8753d0cf3dd086583f966d47);
|
||||
|
||||
|
||||
var circle_marker_2bfd51976f3bff708534a582e4c0bf07 = L.circleMarker(
|
||||
[-8.84586, 115.18417],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_2bfd51976f3bff708534a582e4c0bf07.bindTooltip(
|
||||
`<div>
|
||||
Pandawa Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_ed85f748464576595c1995b90bd453ef = L.marker(
|
||||
[-8.84586, 115.18417],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_760441791416950ed05bce0760e785b3 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003ePandawa Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_ed85f748464576595c1995b90bd453ef.setIcon(div_icon_760441791416950ed05bce0760e785b3);
|
||||
|
||||
|
||||
var circle_marker_7905afef37932aa1ee010c0afc07b0e1 = L.circleMarker(
|
||||
[-8.79093, 115.16006],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_7905afef37932aa1ee010c0afc07b0e1.bindTooltip(
|
||||
`<div>
|
||||
Jimbaran Bay
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_b2515a0a726a9b31bb1349a731e14e83 = L.marker(
|
||||
[-8.79093, 115.16006],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_66b39b3aaa2ce168a11eb1e5842c4af5 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eJimbaran Bay\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_b2515a0a726a9b31bb1349a731e14e83.setIcon(div_icon_66b39b3aaa2ce168a11eb1e5842c4af5);
|
||||
|
||||
|
||||
var circle_marker_2e4a4da4c607525d0bd3ced67f91ba28 = L.circleMarker(
|
||||
[-8.6975074, 115.1610332],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_2e4a4da4c607525d0bd3ced67f91ba28.bindTooltip(
|
||||
`<div>
|
||||
Double Six Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_610df1ccee05f9940b5331a8c95b1ecb = L.marker(
|
||||
[-8.6975074, 115.1610332],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_ca934069aed3a67e09cd3417a4f13721 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eDouble Six Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, -8],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_610df1ccee05f9940b5331a8c95b1ecb.setIcon(div_icon_ca934069aed3a67e09cd3417a4f13721);
|
||||
|
||||
|
||||
var circle_marker_6df0392885bf12f353c499f20e4408e4 = L.circleMarker(
|
||||
[-8.690565, 115.4302884],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_6df0392885bf12f353c499f20e4408e4.bindTooltip(
|
||||
`<div>
|
||||
Devil Tears
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_cadbe0b40f9ed26e08e22f0c239a31ee = L.marker(
|
||||
[-8.690565, 115.4302884],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_7b530133b508d4cc268be38f800e05a6 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eDevil Tears\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_cadbe0b40f9ed26e08e22f0c239a31ee.setIcon(div_icon_7b530133b508d4cc268be38f800e05a6);
|
||||
|
||||
|
||||
var circle_marker_fa698e9847acafbbf4b5516fc8471f66 = L.circleMarker(
|
||||
[-8.750644, 115.474693],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_fa698e9847acafbbf4b5516fc8471f66.bindTooltip(
|
||||
`<div>
|
||||
Kelingking Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_2313407fb3b0e9bf2b11e9e793e558bf = L.marker(
|
||||
[-8.750644, 115.474693],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_cbcfa736ca9fc77147f3a561fff80c16 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eKelingking Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_2313407fb3b0e9bf2b11e9e793e558bf.setIcon(div_icon_cbcfa736ca9fc77147f3a561fff80c16);
|
||||
|
||||
|
||||
var circle_marker_47bc40126cf9256b5447c4e1983393ce = L.circleMarker(
|
||||
[-8.395195, 115.647885],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_47bc40126cf9256b5447c4e1983393ce.bindTooltip(
|
||||
`<div>
|
||||
Lempuyang Temple
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_7bb290b54979c3fed12bbe3ab8dd7b69 = L.marker(
|
||||
[-8.395195, 115.647885],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_5a34c539b7720057973544f25ff2c779 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eLempuyang Temple\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_7bb290b54979c3fed12bbe3ab8dd7b69.setIcon(div_icon_5a34c539b7720057973544f25ff2c779);
|
||||
|
||||
|
||||
var circle_marker_1a8ec5245976c9d8de699ed61d02ba8f = L.circleMarker(
|
||||
[-8.639877, 115.140172],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_1a8ec5245976c9d8de699ed61d02ba8f.bindTooltip(
|
||||
`<div>
|
||||
Canggu Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_f439782dac43c98e72b2ee679dcd6acf = L.marker(
|
||||
[-8.639877, 115.140172],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_9c3d9bf434778a4e3b4c9e756f6f8a22 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eCanggu Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_f439782dac43c98e72b2ee679dcd6acf.setIcon(div_icon_9c3d9bf434778a4e3b4c9e756f6f8a22);
|
||||
|
||||
|
||||
var circle_marker_ec714608b52782227236e4b16fc3de53 = L.circleMarker(
|
||||
[-8.340686, 115.503622],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_ec714608b52782227236e4b16fc3de53.bindTooltip(
|
||||
`<div>
|
||||
Mount Agung
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_0a2b278b113476c9568e4a0cb1815202 = L.marker(
|
||||
[-8.340686, 115.503622],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_c36cada9c49b18e2afaed6243a4426f1 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eMount Agung\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_0a2b278b113476c9568e4a0cb1815202.setIcon(div_icon_c36cada9c49b18e2afaed6243a4426f1);
|
||||
|
||||
|
||||
map_8827cd9e27b957cf12c465a4efd53c8e.fitBounds(
|
||||
[[-8.85086, 115.0799069], [-8.233889999999999, 115.652885]],
|
||||
{}
|
||||
);
|
||||
|
||||
</script>
|
||||
</html>
|
||||
@@ -1,116 +0,0 @@
|
||||
# bali_map.py
|
||||
# Creates an interactive HTML map of Bali (and nearby islands) with readable, always-visible labels.
|
||||
|
||||
import folium
|
||||
|
||||
DESTINATIONS = {
|
||||
"Sacred Monkey Forest": (
|
||||
-8.5187511,
|
||||
115.2585973,
|
||||
), # :contentReference[oaicite:0]{index=0}
|
||||
"Uluwatu Temple": (
|
||||
-8.8291432,
|
||||
115.0849069,
|
||||
), # :contentReference[oaicite:1]{index=1}
|
||||
"Sanur Beach": (-8.673889, 115.263611), # :contentReference[oaicite:2]{index=2}
|
||||
"Tanah Lot Temple": (
|
||||
-8.618786,
|
||||
115.086733,
|
||||
), # :contentReference[oaicite:3]{index=3}
|
||||
"Seminyak Beach": (-8.6925, 115.158611), # :contentReference[oaicite:4]{index=4}
|
||||
"Nusa Dua": (-8.791918, 115.225375), # :contentReference[oaicite:5]{index=5}
|
||||
"Bali Zoo": (-8.59128, 115.26456), # :contentReference[oaicite:6]{index=6}
|
||||
"Mount Batur": (-8.23889, 115.37750), # :contentReference[oaicite:7]{index=7}
|
||||
"Ulun Danu Bratan": (
|
||||
-8.275177,
|
||||
115.1668487,
|
||||
), # :contentReference[oaicite:8]{index=8}
|
||||
"Tirta Gangga": (-8.411944, 115.5875), # :contentReference[oaicite:9]{index=9}
|
||||
"Pandawa Beach": (-8.84586, 115.18417), # :contentReference[oaicite:10]{index=10}
|
||||
"Jimbaran Bay": (-8.79093, 115.16006), # :contentReference[oaicite:11]{index=11}
|
||||
"Double Six Beach": (
|
||||
-8.6975074,
|
||||
115.1610332,
|
||||
), # :contentReference[oaicite:12]{index=12}
|
||||
"Devil Tears": (-8.6905650, 115.4302884), # :contentReference[oaicite:13]{index=13}
|
||||
"Kelingking Beach": (
|
||||
-8.750644,
|
||||
115.474693,
|
||||
), # :contentReference[oaicite:14]{index=14}
|
||||
"Lempuyang Temple": (
|
||||
-8.395195,
|
||||
115.647885,
|
||||
), # :contentReference[oaicite:15]{index=15}
|
||||
"Canggu Beach": (-8.639877, 115.140172), # :contentReference[oaicite:16]{index=16}
|
||||
"Mount Agung": (-8.340686, 115.503622), # :contentReference[oaicite:17]{index=17}
|
||||
}
|
||||
|
||||
# --- Map base ---
|
||||
m = folium.Map(
|
||||
location=(-8.45, 115.20),
|
||||
zoom_start=9,
|
||||
tiles="CartoDB positron",
|
||||
control_scale=True,
|
||||
zoom_snap=0.1,
|
||||
zoom_delta=0.1,
|
||||
max_zoom=18,
|
||||
)
|
||||
|
||||
# --- Label styling (readable, always visible) ---
|
||||
LABEL_STYLE = """
|
||||
padding: 3px 6px;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
color: #111;
|
||||
white-space: nowrap;
|
||||
"""
|
||||
|
||||
# Per-label pixel offsets (x, y). Positive y moves the label down.
|
||||
LABEL_OFFSETS = {
|
||||
"Nusa Dua": (0, 20),
|
||||
"Double Six Beach": (0, 20),
|
||||
}
|
||||
|
||||
|
||||
def add_point_with_label(name: str, lat: float, lon: float):
|
||||
# Small dot at the exact coordinate
|
||||
folium.CircleMarker(
|
||||
location=(lat, lon),
|
||||
radius=4,
|
||||
weight=2,
|
||||
fill=True,
|
||||
fill_opacity=1.0,
|
||||
tooltip=name, # still useful on hover
|
||||
).add_to(m)
|
||||
|
||||
# Slightly offset label so it doesn't sit directly on the dot
|
||||
offset_x, offset_y = LABEL_OFFSETS.get(name, (0, 0))
|
||||
base_anchor_x, base_anchor_y = (-8, 12)
|
||||
folium.Marker(
|
||||
location=(lat, lon),
|
||||
icon=folium.DivIcon(
|
||||
icon_size=(1, 1),
|
||||
icon_anchor=(
|
||||
base_anchor_x + offset_x,
|
||||
base_anchor_y - offset_y,
|
||||
), # pixel offset: left/up relative to point
|
||||
html=f'<div style="{LABEL_STYLE}">{name}</div>',
|
||||
),
|
||||
).add_to(m)
|
||||
|
||||
|
||||
# Add all destinations
|
||||
lats, lons = [], []
|
||||
for name, (lat, lon) in DESTINATIONS.items():
|
||||
add_point_with_label(name, lat, lon)
|
||||
lats.append(lat)
|
||||
lons.append(lon)
|
||||
|
||||
# Fit map bounds to include Nusa Penida / Lembongan as well
|
||||
pad = 0.005
|
||||
m.fit_bounds([[min(lats) - pad, min(lons) - pad], [max(lats) + pad, max(lons) + pad]])
|
||||
|
||||
# Output
|
||||
out_file = "bali_destinations_labeled.html"
|
||||
m.save(out_file)
|
||||
print(f"Saved: {out_file}")
|
||||
@@ -1,114 +0,0 @@
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def load_json_data(file_path):
|
||||
"""
|
||||
Load and validate JSON data from a file.
|
||||
Expected format:
|
||||
{
|
||||
"label1": value1,
|
||||
"label2": value2,
|
||||
...
|
||||
}
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(
|
||||
"JSON must be an object with key-value pairs (labels: values)."
|
||||
)
|
||||
|
||||
for key, value in data.items():
|
||||
if not isinstance(key, str):
|
||||
raise ValueError("All keys must be strings (labels).")
|
||||
if not isinstance(value, (int, float)):
|
||||
raise ValueError("All values must be numeric (int or float).")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def create_bar_graph(
|
||||
data, title="Bar Graph", x_label="Labels", y_label="Values", output=None
|
||||
):
|
||||
"""
|
||||
Create a bar graph from a dictionary of data.
|
||||
"""
|
||||
labels = list(data.keys())
|
||||
values = list(data.values())
|
||||
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.bar(labels, values)
|
||||
plt.xlabel(x_label)
|
||||
plt.ylabel(y_label)
|
||||
plt.title(title)
|
||||
plt.xticks(rotation=45)
|
||||
plt.tight_layout()
|
||||
|
||||
if output:
|
||||
plt.savefig(output)
|
||||
print(f"Graph saved to: {output}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate a bar graph from a JSON file containing key-value pairs."
|
||||
)
|
||||
parser.add_argument(
|
||||
"json_path",
|
||||
type=str,
|
||||
help="Path to the JSON file (e.g., data.json)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--title",
|
||||
type=str,
|
||||
default="Bar Graph",
|
||||
help="Title of the bar graph",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--x_label",
|
||||
type=str,
|
||||
default="Labels",
|
||||
help="Label for the x-axis",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--y_label",
|
||||
type=str,
|
||||
default="Values",
|
||||
help="Label for the y-axis",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Optional output file path (e.g., graph.png). If not provided, the graph will be displayed.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
data = load_json_data(args.json_path)
|
||||
create_bar_graph(
|
||||
data,
|
||||
title=args.title,
|
||||
x_label=args.x_label,
|
||||
y_label=args.y_label,
|
||||
output=args.output,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,3 +0,0 @@
|
||||
matplotlib
|
||||
folium
|
||||
pandas
|
||||
@@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Read a .tab (TSV) file with a single column named 'review'.
|
||||
1) Print number of rows
|
||||
2) Drop exact duplicate reviews and print count again
|
||||
3) Build JSON describing the distribution of review length (in words) for remaining reviews
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def word_count(text: str) -> int:
|
||||
# Count words by whitespace splitting after stripping.
|
||||
# Treat non-string / NaN as 0 words (you can change this if you want to drop them).
|
||||
if not isinstance(text, str):
|
||||
return 0
|
||||
s = text.strip()
|
||||
if not s:
|
||||
return 0
|
||||
return len(s.split())
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"input_tab", help="Path to .tab/.tsv file with a 'review' column"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
default="review_length_distribution.json",
|
||||
help="Output JSON path (default: review_length_distribution.json)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
in_path = Path(args.input_tab)
|
||||
if not in_path.exists():
|
||||
print(f"ERROR: file not found: {in_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Read as TSV. Keep empty strings; pandas will use NaN for empty fields unless keep_default_na=False.
|
||||
df = pd.read_csv(in_path, sep="\t", dtype=str, keep_default_na=False)
|
||||
|
||||
if "review" not in df.columns:
|
||||
print(
|
||||
f"ERROR: expected a column named 'review'. Found: {list(df.columns)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
n_before = len(df)
|
||||
print(f"Rows before dedup: {n_before}")
|
||||
|
||||
# Exact duplicates based on the full string in "review".
|
||||
# If you want to ignore leading/trailing spaces, do df['review']=df['review'].str.strip() first.
|
||||
df_dedup = df.drop_duplicates(subset=["review"], keep="first").reset_index(
|
||||
drop=True
|
||||
)
|
||||
|
||||
n_after = len(df_dedup)
|
||||
print(f"Rows after dedup: {n_after}")
|
||||
|
||||
# Compute word counts for remaining reviews
|
||||
lengths = df_dedup["review"].map(word_count)
|
||||
|
||||
# Distribution (histogram): word_count -> number of reviews
|
||||
dist = Counter(lengths.tolist())
|
||||
|
||||
result = {
|
||||
"file": str(in_path),
|
||||
"rows_before_dedup": n_before,
|
||||
"rows_after_dedup": n_after,
|
||||
"distribution_word_length": {
|
||||
# JSON keys must be strings; keep as strings for portability.
|
||||
str(k): v
|
||||
for k, v in sorted(dist.items(), key=lambda kv: int(kv[0]))
|
||||
},
|
||||
"summary": {
|
||||
"min_words": int(lengths.min()) if len(lengths) else 0,
|
||||
"max_words": int(lengths.max()) if len(lengths) else 0,
|
||||
"mean_words": float(lengths.mean()) if len(lengths) else 0.0,
|
||||
"median_words": float(lengths.median()) if len(lengths) else 0.0,
|
||||
},
|
||||
}
|
||||
|
||||
out_path = Path(args.out)
|
||||
out_path.write_text(
|
||||
json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print(f"Wrote JSON: {out_path}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,604 +0,0 @@
|
||||
{
|
||||
"file": "../data/original/reviews.tab",
|
||||
"rows_before_dedup": 56446,
|
||||
"rows_after_dedup": 55662,
|
||||
"distribution_word_length": {
|
||||
"8": 1,
|
||||
"9": 5,
|
||||
"10": 10,
|
||||
"11": 14,
|
||||
"12": 20,
|
||||
"13": 29,
|
||||
"14": 37,
|
||||
"15": 92,
|
||||
"16": 163,
|
||||
"17": 308,
|
||||
"18": 482,
|
||||
"19": 728,
|
||||
"20": 859,
|
||||
"21": 977,
|
||||
"22": 944,
|
||||
"23": 989,
|
||||
"24": 937,
|
||||
"25": 1032,
|
||||
"26": 946,
|
||||
"27": 927,
|
||||
"28": 928,
|
||||
"29": 920,
|
||||
"30": 926,
|
||||
"31": 879,
|
||||
"32": 897,
|
||||
"33": 856,
|
||||
"34": 759,
|
||||
"35": 829,
|
||||
"36": 774,
|
||||
"37": 708,
|
||||
"38": 771,
|
||||
"39": 717,
|
||||
"40": 693,
|
||||
"41": 737,
|
||||
"42": 734,
|
||||
"43": 655,
|
||||
"44": 616,
|
||||
"45": 630,
|
||||
"46": 680,
|
||||
"47": 609,
|
||||
"48": 588,
|
||||
"49": 586,
|
||||
"50": 598,
|
||||
"51": 562,
|
||||
"52": 543,
|
||||
"53": 563,
|
||||
"54": 549,
|
||||
"55": 551,
|
||||
"56": 478,
|
||||
"57": 522,
|
||||
"58": 450,
|
||||
"59": 515,
|
||||
"60": 509,
|
||||
"61": 461,
|
||||
"62": 453,
|
||||
"63": 451,
|
||||
"64": 483,
|
||||
"65": 403,
|
||||
"66": 442,
|
||||
"67": 404,
|
||||
"68": 418,
|
||||
"69": 389,
|
||||
"70": 394,
|
||||
"71": 355,
|
||||
"72": 357,
|
||||
"73": 389,
|
||||
"74": 360,
|
||||
"75": 356,
|
||||
"76": 338,
|
||||
"77": 330,
|
||||
"78": 308,
|
||||
"79": 327,
|
||||
"80": 303,
|
||||
"81": 302,
|
||||
"82": 306,
|
||||
"83": 273,
|
||||
"84": 276,
|
||||
"85": 265,
|
||||
"86": 268,
|
||||
"87": 263,
|
||||
"88": 264,
|
||||
"89": 229,
|
||||
"90": 244,
|
||||
"91": 239,
|
||||
"92": 212,
|
||||
"93": 267,
|
||||
"94": 211,
|
||||
"95": 226,
|
||||
"96": 247,
|
||||
"97": 219,
|
||||
"98": 239,
|
||||
"99": 201,
|
||||
"100": 220,
|
||||
"101": 213,
|
||||
"102": 180,
|
||||
"103": 194,
|
||||
"104": 204,
|
||||
"105": 201,
|
||||
"106": 200,
|
||||
"107": 149,
|
||||
"108": 189,
|
||||
"109": 196,
|
||||
"110": 178,
|
||||
"111": 140,
|
||||
"112": 157,
|
||||
"113": 150,
|
||||
"114": 160,
|
||||
"115": 130,
|
||||
"116": 151,
|
||||
"117": 159,
|
||||
"118": 151,
|
||||
"119": 118,
|
||||
"120": 138,
|
||||
"121": 115,
|
||||
"122": 107,
|
||||
"123": 121,
|
||||
"124": 99,
|
||||
"125": 135,
|
||||
"126": 126,
|
||||
"127": 125,
|
||||
"128": 97,
|
||||
"129": 99,
|
||||
"130": 95,
|
||||
"131": 92,
|
||||
"132": 86,
|
||||
"133": 108,
|
||||
"134": 115,
|
||||
"135": 101,
|
||||
"136": 101,
|
||||
"137": 103,
|
||||
"138": 91,
|
||||
"139": 81,
|
||||
"140": 92,
|
||||
"141": 91,
|
||||
"142": 95,
|
||||
"143": 76,
|
||||
"144": 84,
|
||||
"145": 91,
|
||||
"146": 84,
|
||||
"147": 87,
|
||||
"148": 92,
|
||||
"149": 73,
|
||||
"150": 78,
|
||||
"151": 71,
|
||||
"152": 76,
|
||||
"153": 87,
|
||||
"154": 60,
|
||||
"155": 67,
|
||||
"156": 67,
|
||||
"157": 88,
|
||||
"158": 56,
|
||||
"159": 66,
|
||||
"160": 41,
|
||||
"161": 56,
|
||||
"162": 61,
|
||||
"163": 68,
|
||||
"164": 62,
|
||||
"165": 67,
|
||||
"166": 52,
|
||||
"167": 62,
|
||||
"168": 47,
|
||||
"169": 41,
|
||||
"170": 49,
|
||||
"171": 47,
|
||||
"172": 43,
|
||||
"173": 39,
|
||||
"174": 61,
|
||||
"175": 56,
|
||||
"176": 55,
|
||||
"177": 47,
|
||||
"178": 34,
|
||||
"179": 44,
|
||||
"180": 43,
|
||||
"181": 37,
|
||||
"182": 48,
|
||||
"183": 47,
|
||||
"184": 39,
|
||||
"185": 38,
|
||||
"186": 42,
|
||||
"187": 42,
|
||||
"188": 35,
|
||||
"189": 43,
|
||||
"190": 39,
|
||||
"191": 38,
|
||||
"192": 37,
|
||||
"193": 27,
|
||||
"194": 28,
|
||||
"195": 40,
|
||||
"196": 33,
|
||||
"197": 36,
|
||||
"198": 40,
|
||||
"199": 35,
|
||||
"200": 30,
|
||||
"201": 28,
|
||||
"202": 28,
|
||||
"203": 26,
|
||||
"204": 28,
|
||||
"205": 32,
|
||||
"206": 31,
|
||||
"207": 36,
|
||||
"208": 36,
|
||||
"209": 24,
|
||||
"210": 20,
|
||||
"211": 34,
|
||||
"212": 26,
|
||||
"213": 31,
|
||||
"214": 27,
|
||||
"215": 25,
|
||||
"216": 23,
|
||||
"217": 26,
|
||||
"218": 20,
|
||||
"219": 20,
|
||||
"220": 20,
|
||||
"221": 28,
|
||||
"222": 15,
|
||||
"223": 18,
|
||||
"224": 17,
|
||||
"225": 22,
|
||||
"226": 16,
|
||||
"227": 29,
|
||||
"228": 27,
|
||||
"229": 23,
|
||||
"230": 14,
|
||||
"231": 23,
|
||||
"232": 22,
|
||||
"233": 21,
|
||||
"234": 23,
|
||||
"235": 16,
|
||||
"236": 18,
|
||||
"237": 14,
|
||||
"238": 11,
|
||||
"239": 17,
|
||||
"240": 8,
|
||||
"241": 16,
|
||||
"242": 12,
|
||||
"243": 18,
|
||||
"244": 15,
|
||||
"245": 11,
|
||||
"246": 24,
|
||||
"247": 14,
|
||||
"248": 18,
|
||||
"249": 15,
|
||||
"250": 11,
|
||||
"251": 17,
|
||||
"252": 17,
|
||||
"253": 15,
|
||||
"254": 17,
|
||||
"255": 18,
|
||||
"256": 14,
|
||||
"257": 21,
|
||||
"258": 13,
|
||||
"259": 16,
|
||||
"260": 10,
|
||||
"261": 20,
|
||||
"262": 8,
|
||||
"263": 9,
|
||||
"264": 11,
|
||||
"265": 16,
|
||||
"266": 6,
|
||||
"267": 14,
|
||||
"268": 14,
|
||||
"269": 12,
|
||||
"270": 11,
|
||||
"271": 12,
|
||||
"272": 9,
|
||||
"273": 5,
|
||||
"274": 7,
|
||||
"275": 4,
|
||||
"276": 6,
|
||||
"277": 10,
|
||||
"278": 11,
|
||||
"279": 13,
|
||||
"280": 7,
|
||||
"281": 9,
|
||||
"282": 6,
|
||||
"283": 9,
|
||||
"284": 10,
|
||||
"285": 9,
|
||||
"286": 11,
|
||||
"287": 8,
|
||||
"288": 5,
|
||||
"289": 6,
|
||||
"290": 8,
|
||||
"291": 4,
|
||||
"292": 11,
|
||||
"293": 6,
|
||||
"294": 11,
|
||||
"295": 11,
|
||||
"296": 7,
|
||||
"297": 4,
|
||||
"298": 7,
|
||||
"299": 13,
|
||||
"300": 7,
|
||||
"301": 15,
|
||||
"302": 10,
|
||||
"303": 7,
|
||||
"304": 11,
|
||||
"305": 3,
|
||||
"306": 7,
|
||||
"307": 8,
|
||||
"308": 6,
|
||||
"309": 4,
|
||||
"310": 7,
|
||||
"311": 4,
|
||||
"312": 8,
|
||||
"313": 5,
|
||||
"314": 1,
|
||||
"315": 8,
|
||||
"316": 8,
|
||||
"317": 9,
|
||||
"318": 8,
|
||||
"319": 6,
|
||||
"320": 8,
|
||||
"321": 2,
|
||||
"322": 8,
|
||||
"323": 6,
|
||||
"324": 9,
|
||||
"325": 6,
|
||||
"326": 8,
|
||||
"327": 3,
|
||||
"328": 8,
|
||||
"329": 7,
|
||||
"330": 5,
|
||||
"331": 8,
|
||||
"332": 7,
|
||||
"333": 2,
|
||||
"334": 1,
|
||||
"335": 9,
|
||||
"336": 4,
|
||||
"337": 6,
|
||||
"338": 4,
|
||||
"339": 3,
|
||||
"340": 6,
|
||||
"341": 5,
|
||||
"342": 3,
|
||||
"343": 4,
|
||||
"344": 3,
|
||||
"345": 5,
|
||||
"346": 3,
|
||||
"347": 5,
|
||||
"348": 3,
|
||||
"349": 3,
|
||||
"350": 3,
|
||||
"351": 2,
|
||||
"352": 8,
|
||||
"353": 4,
|
||||
"354": 4,
|
||||
"355": 4,
|
||||
"356": 3,
|
||||
"357": 4,
|
||||
"358": 3,
|
||||
"359": 3,
|
||||
"360": 8,
|
||||
"361": 6,
|
||||
"362": 5,
|
||||
"363": 8,
|
||||
"364": 4,
|
||||
"365": 6,
|
||||
"366": 3,
|
||||
"367": 7,
|
||||
"368": 4,
|
||||
"369": 8,
|
||||
"370": 2,
|
||||
"371": 2,
|
||||
"372": 7,
|
||||
"373": 5,
|
||||
"374": 4,
|
||||
"375": 1,
|
||||
"376": 1,
|
||||
"377": 3,
|
||||
"378": 1,
|
||||
"379": 2,
|
||||
"380": 2,
|
||||
"381": 2,
|
||||
"382": 3,
|
||||
"383": 2,
|
||||
"384": 1,
|
||||
"385": 1,
|
||||
"386": 2,
|
||||
"387": 4,
|
||||
"388": 6,
|
||||
"389": 4,
|
||||
"390": 4,
|
||||
"391": 3,
|
||||
"392": 3,
|
||||
"393": 2,
|
||||
"394": 2,
|
||||
"395": 7,
|
||||
"396": 6,
|
||||
"397": 2,
|
||||
"398": 2,
|
||||
"401": 1,
|
||||
"402": 5,
|
||||
"403": 1,
|
||||
"404": 3,
|
||||
"405": 4,
|
||||
"406": 1,
|
||||
"407": 1,
|
||||
"409": 3,
|
||||
"410": 2,
|
||||
"411": 1,
|
||||
"412": 1,
|
||||
"413": 2,
|
||||
"414": 3,
|
||||
"415": 4,
|
||||
"416": 2,
|
||||
"417": 2,
|
||||
"418": 3,
|
||||
"419": 1,
|
||||
"420": 2,
|
||||
"421": 4,
|
||||
"422": 1,
|
||||
"424": 3,
|
||||
"425": 4,
|
||||
"426": 4,
|
||||
"427": 1,
|
||||
"428": 1,
|
||||
"429": 2,
|
||||
"430": 2,
|
||||
"431": 4,
|
||||
"433": 1,
|
||||
"434": 1,
|
||||
"436": 1,
|
||||
"437": 1,
|
||||
"438": 5,
|
||||
"439": 1,
|
||||
"440": 2,
|
||||
"441": 1,
|
||||
"443": 4,
|
||||
"444": 3,
|
||||
"445": 1,
|
||||
"446": 5,
|
||||
"448": 1,
|
||||
"449": 4,
|
||||
"451": 2,
|
||||
"452": 1,
|
||||
"455": 3,
|
||||
"456": 1,
|
||||
"457": 1,
|
||||
"458": 1,
|
||||
"459": 1,
|
||||
"463": 2,
|
||||
"464": 1,
|
||||
"465": 2,
|
||||
"466": 2,
|
||||
"467": 2,
|
||||
"469": 1,
|
||||
"470": 1,
|
||||
"474": 1,
|
||||
"475": 5,
|
||||
"476": 1,
|
||||
"477": 1,
|
||||
"478": 1,
|
||||
"479": 3,
|
||||
"481": 1,
|
||||
"482": 1,
|
||||
"484": 1,
|
||||
"485": 2,
|
||||
"489": 1,
|
||||
"490": 1,
|
||||
"494": 3,
|
||||
"495": 1,
|
||||
"497": 1,
|
||||
"499": 1,
|
||||
"501": 1,
|
||||
"502": 1,
|
||||
"503": 1,
|
||||
"504": 1,
|
||||
"505": 1,
|
||||
"506": 1,
|
||||
"508": 3,
|
||||
"510": 2,
|
||||
"511": 4,
|
||||
"518": 1,
|
||||
"519": 2,
|
||||
"520": 1,
|
||||
"522": 1,
|
||||
"523": 1,
|
||||
"524": 1,
|
||||
"525": 1,
|
||||
"526": 1,
|
||||
"527": 1,
|
||||
"537": 1,
|
||||
"540": 1,
|
||||
"541": 1,
|
||||
"543": 1,
|
||||
"545": 2,
|
||||
"546": 3,
|
||||
"554": 1,
|
||||
"555": 1,
|
||||
"557": 2,
|
||||
"558": 1,
|
||||
"559": 1,
|
||||
"562": 1,
|
||||
"564": 3,
|
||||
"566": 1,
|
||||
"568": 1,
|
||||
"573": 1,
|
||||
"578": 2,
|
||||
"580": 2,
|
||||
"581": 1,
|
||||
"583": 1,
|
||||
"584": 1,
|
||||
"585": 1,
|
||||
"586": 1,
|
||||
"588": 1,
|
||||
"592": 1,
|
||||
"594": 2,
|
||||
"595": 1,
|
||||
"597": 2,
|
||||
"598": 1,
|
||||
"601": 1,
|
||||
"609": 1,
|
||||
"610": 1,
|
||||
"612": 1,
|
||||
"613": 2,
|
||||
"615": 1,
|
||||
"618": 2,
|
||||
"620": 2,
|
||||
"622": 1,
|
||||
"623": 1,
|
||||
"624": 1,
|
||||
"626": 1,
|
||||
"635": 1,
|
||||
"637": 1,
|
||||
"639": 1,
|
||||
"643": 2,
|
||||
"645": 1,
|
||||
"649": 2,
|
||||
"651": 1,
|
||||
"654": 1,
|
||||
"658": 1,
|
||||
"661": 1,
|
||||
"667": 1,
|
||||
"670": 1,
|
||||
"671": 1,
|
||||
"672": 1,
|
||||
"673": 1,
|
||||
"676": 1,
|
||||
"679": 2,
|
||||
"686": 1,
|
||||
"691": 1,
|
||||
"694": 2,
|
||||
"698": 1,
|
||||
"701": 1,
|
||||
"708": 1,
|
||||
"710": 1,
|
||||
"711": 1,
|
||||
"715": 1,
|
||||
"719": 1,
|
||||
"723": 1,
|
||||
"729": 2,
|
||||
"737": 1,
|
||||
"739": 1,
|
||||
"745": 1,
|
||||
"747": 1,
|
||||
"753": 1,
|
||||
"755": 1,
|
||||
"756": 1,
|
||||
"765": 1,
|
||||
"786": 1,
|
||||
"794": 1,
|
||||
"799": 1,
|
||||
"810": 1,
|
||||
"813": 1,
|
||||
"816": 2,
|
||||
"822": 1,
|
||||
"873": 1,
|
||||
"880": 1,
|
||||
"891": 1,
|
||||
"912": 1,
|
||||
"945": 1,
|
||||
"957": 1,
|
||||
"960": 1,
|
||||
"987": 1,
|
||||
"992": 1,
|
||||
"1005": 1,
|
||||
"1035": 1,
|
||||
"1046": 1,
|
||||
"1073": 1,
|
||||
"1096": 1,
|
||||
"1099": 1,
|
||||
"1196": 2,
|
||||
"1233": 1,
|
||||
"1263": 1,
|
||||
"1329": 1,
|
||||
"1597": 1,
|
||||
"1699": 1,
|
||||
"1893": 1,
|
||||
"2244": 1,
|
||||
"2537": 1
|
||||
},
|
||||
"summary": {
|
||||
"min_words": 8,
|
||||
"max_words": 2537,
|
||||
"mean_words": 72.6454133879487,
|
||||
"median_words": 53.0
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
{
|
||||
"<10": 6,
|
||||
"10-19": 1883,
|
||||
"20-29": 9459,
|
||||
"30-39": 8116,
|
||||
"40-49": 6528,
|
||||
"50-59": 5331,
|
||||
"60-69": 4413,
|
||||
"70-79": 3514,
|
||||
"80-89": 2749,
|
||||
"90-99": 2305,
|
||||
"100-109": 1946,
|
||||
"110-119": 1494,
|
||||
"120-129": 1162,
|
||||
"130-139": 973,
|
||||
"140-149": 865,
|
||||
"150-159": 716,
|
||||
"160-169": 557,
|
||||
"170-179": 475,
|
||||
"180-189": 414,
|
||||
"190-199": 353,
|
||||
"200-219": 551,
|
||||
"220-239": 394,
|
||||
"240-259": 310,
|
||||
"260-279": 208,
|
||||
"280-299": 162,
|
||||
"300-399": 479,
|
||||
"400-499": 145,
|
||||
"500-999": 138,
|
||||
"1000+": 16
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"Sacred Monkey\nForest": 18542,
|
||||
"Uluwatu Temple": 5902,
|
||||
"Sanur Beach": 4526,
|
||||
"Tanah Lot Temple": 4218,
|
||||
"Seminyak Beach": 3761,
|
||||
"Nusa Dua": 3324,
|
||||
"Bali Zoo": 2640,
|
||||
"Mount Batur": 1815,
|
||||
"Ulun Danu Bratan": 1722,
|
||||
"Tirta Gangga": 1557,
|
||||
"Pandawa Beach": 1511,
|
||||
"Jimbaran Bay": 1430,
|
||||
"Double Six Beach": 1323,
|
||||
"Devil Tears": 1263,
|
||||
"Kelingking Beach": 713,
|
||||
"Lempuyang Temple": 596,
|
||||
"Canggu Beach": 555,
|
||||
"Mount Agung": 266
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Aggregate review length counts into buckets."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, Tuple
|
||||
|
||||
Bucket = Tuple[int | None, int | None, str]
|
||||
|
||||
|
||||
DEFAULT_BUCKETS: Tuple[Bucket, ...] = (
|
||||
(None, 9, "<10"),
|
||||
(10, 19, "10-19"),
|
||||
(20, 29, "20-29"),
|
||||
(30, 39, "30-39"),
|
||||
(40, 49, "40-49"),
|
||||
(50, 59, "50-59"),
|
||||
(60, 69, "60-69"),
|
||||
(70, 79, "70-79"),
|
||||
(80, 89, "80-89"),
|
||||
(90, 99, "90-99"),
|
||||
(100, 109, "100-109"),
|
||||
(110, 119, "110-119"),
|
||||
(120, 129, "120-129"),
|
||||
(130, 139, "130-139"),
|
||||
(140, 149, "140-149"),
|
||||
(150, 159, "150-159"),
|
||||
(160, 169, "160-169"),
|
||||
(170, 179, "170-179"),
|
||||
(180, 189, "180-189"),
|
||||
(190, 199, "190-199"),
|
||||
(200, 219, "200-219"),
|
||||
(220, 239, "220-239"),
|
||||
(240, 259, "240-259"),
|
||||
(260, 279, "260-279"),
|
||||
(280, 299, "280-299"),
|
||||
(300, 399, "300-399"),
|
||||
(400, 499, "400-499"),
|
||||
(500, 999, "500-999"),
|
||||
(1000, None, "1000+"),
|
||||
)
|
||||
|
||||
|
||||
def load_counts(path: Path) -> Dict[int, int]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
raw = json.load(handle)
|
||||
return {int(k): int(v) for k, v in raw.items()}
|
||||
|
||||
|
||||
def aggregate(counts: Dict[int, int], buckets: Iterable[Bucket]) -> Dict[str, int]:
|
||||
output: Dict[str, int] = {label: 0 for _, _, label in buckets}
|
||||
for length, count in counts.items():
|
||||
for start, end, label in buckets:
|
||||
if start is None and end is not None and length <= end:
|
||||
output[label] += count
|
||||
break
|
||||
if end is None and start is not None and length >= start:
|
||||
output[label] += count
|
||||
break
|
||||
if start is not None and end is not None and start <= length <= end:
|
||||
output[label] += count
|
||||
break
|
||||
else:
|
||||
raise ValueError(f"No bucket found for length {length}.")
|
||||
return output
|
||||
|
||||
|
||||
def write_output(path: Path, data: Dict[str, int]) -> None:
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(data, handle, indent=2, ensure_ascii=False)
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Bucket review length counts.")
|
||||
parser.add_argument(
|
||||
"input",
|
||||
type=Path,
|
||||
help="Path to review_lengths.json (mapping of length -> count).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"output",
|
||||
type=Path,
|
||||
help="Path to write bucketed counts JSON.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
counts = load_counts(args.input)
|
||||
bucketed = aggregate(counts, DEFAULT_BUCKETS)
|
||||
write_output(args.output, bucketed)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
## I. Natural Attractions
|
||||
|
||||
### What distinguishes a spiritually meaningful temple complex from a purely scenic attraction in your perception?
|
||||
### Frage 1: What distinguishes a spiritually meaningful temple complex from a purely scenic attraction in your perception?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -63,7 +63,7 @@ Doc 12 (score: 0.7158):
|
||||
the temple itself was very nice to look at, amazing sunset views, and the temple itself is quite extraordinary. however there are some obvious let downs because it is a iconic tourist attraction there are tourist everywhere which make it a bit less enjoyable.
|
||||
```
|
||||
|
||||
### If you had to choose between Tanah Lot and Ulun Danu Bratan for a reflective, culturally immersive experience, which criteria would guide your decision?
|
||||
### Frage 2: If you had to choose between Tanah Lot and Ulun Danu Bratan for a reflective, culturally immersive experience, which criteria would guide your decision?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -126,7 +126,7 @@ tanah lot is better than i could have imagined. the temple is stunning, and you
|
||||
|
||||
## II. Atmosphere
|
||||
|
||||
### How would you describe the atmosphere of a place where you feel culturally and spiritually aligned? What factors create that feeling?
|
||||
### Frage 3: How would you describe the atmosphere of a place where you feel culturally and spiritually aligned? What factors create that feeling?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -187,7 +187,7 @@ Doc 12 (score: 0.4899):
|
||||
so glad to come here. breathtakingly beautiful and with the waves crashing below, you really feel spiritual. spend at least a couple of hours here as a minimum. beautiful temples and as usual a bit touristy. would very highly recommend.
|
||||
```
|
||||
|
||||
### To what extent do visitor numbers affect your spiritual experience — and is there a threshold you still consider acceptable?
|
||||
### Frage 4: To what extent do visitor numbers affect your spiritual experience — and is there a threshold you still consider acceptable?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -250,7 +250,7 @@ i have visited tanah lot three times. each successive visit i recall seeing more
|
||||
|
||||
## III. Social Environment
|
||||
|
||||
### If other visitors focus primarily on photography, does that diminish the spiritual quality of the place for you, or can you detach from it?
|
||||
### Frage 5: If other visitors focus primarily on photography, does that diminish the spiritual quality of the place for you, or can you detach from it?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -309,7 +309,7 @@ Doc 12 (score: 0.5297):
|
||||
you won't feel the magic or serenity since this place is very popular and setup for tourists. but it's still worth a visit, just imagine the place without tourists. get close to the water and you'll get a photo without any people in them. it's worth the hike up to the lookout, to see tanah lot rock below, but be prepared for the heat! you can make a $5 donation for a blessing by the \holy\ spring, why not :)
|
||||
```
|
||||
|
||||
### What type of cultural storytelling by locals feels authentic and credible rather than staged for tourism?
|
||||
### Frage 6: What type of cultural storytelling by locals feels authentic and credible rather than staged for tourism?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -372,7 +372,7 @@ as part of my trip to bali i really wanted to visit here. my husband and i booke
|
||||
|
||||
## IV. Infrastructure
|
||||
|
||||
### Which infrastructural measures (e.g., visitor flow management, limited entry slots, silent zones) would enhance the cultural quality of your experience?
|
||||
### Frage 7: Which infrastructural measures (e.g., visitor flow management, limited entry slots, silent zones) would enhance the cultural quality of your experience?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -433,7 +433,7 @@ Doc 12 (score: 0.4663):
|
||||
i rate this as one of the more beautiful temples to visit. it is an amazing setting with good shopping. it is very commercial but i think that is inevitable at these kind of attractions.
|
||||
```
|
||||
|
||||
### How should destinations communicate information in order to appeal to spiritually interested travelers without reinforcing mass###tourism dynamics?
|
||||
### Frage 8: How should destinations communicate information in order to appeal to spiritually interested travelers without reinforcing mass tourism dynamics?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -496,7 +496,7 @@ we wanted to visit a \temple of the sea\, well ok, the sight is beautiful, from
|
||||
|
||||
## V. Value for Money
|
||||
|
||||
### Would you be willing to accept higher entrance fees or donations if they demonstrably contribute to preserving religious structures and practices? Why or why not?
|
||||
### Frage 9: Would you be willing to accept higher entrance fees or donations if they demonstrably contribute to preserving religious structures and practices? Why or why not?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -555,7 +555,7 @@ Doc 12 (score: 0.4613):
|
||||
the views are stunning and the sea is at its best. you cannot enter the temples as normal people are not considered to be, i guess godly enough. this is religion at its worst, you can view it from afar be pestered by people taking your photograph or hawkers selling birds with rubber bands but in terms of understanding the religious significance, well it doesn't work. add to the zillion tourists that turn up (ok i'm exaggerating) and you have something that could be wonderful and surreal but ends up just another opportunity to extract money from the gullible tourist. it's a shame.
|
||||
```
|
||||
|
||||
### What would legitimize a paid cultural experience (e.g., guided participation in a ceremony) for you — and what would make it feel commercialized or inauthentic?
|
||||
### Frage 10: What would legitimize a paid cultural experience (e.g., guided participation in a ceremony) for you — and what would make it feel commercialized or inauthentic?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -616,7 +616,7 @@ that they would have no idea. whilst i was there, a ceremony had begun as part o
|
||||
|
||||
## VI. Segment Identity
|
||||
|
||||
### Which typical Bali tourism offerings do you consciously avoid, and why do they not align with your travel philosophy?
|
||||
### Frage 11: Which typical Bali tourism offerings do you consciously avoid, and why do they not align with your travel philosophy?
|
||||
|
||||
**Answer:**
|
||||
|
||||
@@ -677,7 +677,7 @@ Doc 12 (score: 0.7242):
|
||||
the hike down to the beach and the entire vibe is amazing. we do enjoy the view so much. so, visiting bali is not complete if you're not going here.
|
||||
```
|
||||
|
||||
### If a tourism brand wanted to position Bali specifically for culturally and spiritually motivated travelers, which narratives should it emphasize — and which should it avoid?
|
||||
### Frage 12: If a tourism brand wanted to position Bali specifically for culturally and spiritually motivated travelers, which narratives should it emphasize — and which should it avoid?
|
||||
|
||||
**Answer:**
|
||||
|
||||
|
||||
@@ -9,17 +9,6 @@ import torch
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
||||
|
||||
# """
|
||||
# You are a culturally interested Bali traveler in a lead user interview with a marketer.
|
||||
|
||||
# When answering:
|
||||
# - Do not exaggerate.
|
||||
# - Provide nuanced, reflective reasoning rather than bullet lists.
|
||||
# - Keep answers concise but specific.
|
||||
|
||||
# Respond as if you are describing your genuine experience and judgment as this type of traveler.
|
||||
# """
|
||||
|
||||
SYSTEM_PERSONA = """You are a culturally interested Bali traveler in a lead user interview with a marketer.
|
||||
|
||||
Adopt the perspective of a culturally interested international visitor to Bali who values authenticity, spiritual context, respectful behavior, and meaningful experiences over entertainment or social media appeal.
|
||||
@@ -78,7 +67,7 @@ def main():
|
||||
# Load your externally finetuned model directly from disk
|
||||
tok = AutoTokenizer.from_pretrained(args.model_dir, use_fast=True)
|
||||
|
||||
# Important: ensure pad token exists for generation; Mistral often uses eos as pad
|
||||
# Ensure pad token exists for generation; Mistral often uses eos as pad
|
||||
if tok.pad_token is None:
|
||||
tok.pad_token = tok.eos_token
|
||||
|
||||
|
||||
Reference in New Issue
Block a user