mirror of
https://github.com/marvinscham/masterthesis-playground.git
synced 2026-03-22 00:12:42 +01:00
Add helper stuff for figures, cleanup
This commit is contained in:
722
figures/bali_destinations_labeled.html
Normal file
722
figures/bali_destinations_labeled.html
Normal file
@@ -0,0 +1,722 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.js"></script>
|
||||
<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js"></script>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.css"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css"/>
|
||||
<link rel="stylesheet" href="https://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap-glyphicons.css"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.2.0/css/all.min.css"/>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css"/>
|
||||
|
||||
<meta name="viewport" content="width=device-width,
|
||||
initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
|
||||
<style>
|
||||
#map_8827cd9e27b957cf12c465a4efd53c8e {
|
||||
position: relative;
|
||||
width: 100.0%;
|
||||
height: 100.0%;
|
||||
left: 0.0%;
|
||||
top: 0.0%;
|
||||
}
|
||||
.leaflet-container { font-size: 1rem; }
|
||||
</style>
|
||||
|
||||
<style>html, body {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
<style>#map {
|
||||
position:absolute;
|
||||
top:0;
|
||||
bottom:0;
|
||||
right:0;
|
||||
left:0;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
L_NO_TOUCH = false;
|
||||
L_DISABLE_3D = false;
|
||||
</script>
|
||||
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
|
||||
<div class="folium-map" id="map_8827cd9e27b957cf12c465a4efd53c8e" ></div>
|
||||
|
||||
</body>
|
||||
<script>
|
||||
|
||||
|
||||
var map_8827cd9e27b957cf12c465a4efd53c8e = L.map(
|
||||
"map_8827cd9e27b957cf12c465a4efd53c8e",
|
||||
{
|
||||
center: [-8.45, 115.2],
|
||||
crs: L.CRS.EPSG3857,
|
||||
...{
|
||||
"zoom": 9,
|
||||
"zoomControl": true,
|
||||
"preferCanvas": false,
|
||||
"zoomSnap": 0.1,
|
||||
"zoomDelta": 0.1,
|
||||
}
|
||||
|
||||
}
|
||||
);
|
||||
L.control.scale().addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
var tile_layer_f4855f09fad51b54d44fb73a67dccf4e = L.tileLayer(
|
||||
"https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png",
|
||||
{
|
||||
"minZoom": 0,
|
||||
"maxZoom": 18,
|
||||
"maxNativeZoom": 18,
|
||||
"noWrap": false,
|
||||
"attribution": "\u0026copy; \u003ca href=\"https://www.openstreetmap.org/copyright\"\u003eOpenStreetMap\u003c/a\u003e contributors \u0026copy; \u003ca href=\"https://carto.com/attributions\"\u003eCARTO\u003c/a\u003e",
|
||||
"subdomains": "abcd",
|
||||
"detectRetina": false,
|
||||
"tms": false,
|
||||
"opacity": 1,
|
||||
}
|
||||
|
||||
);
|
||||
|
||||
|
||||
tile_layer_f4855f09fad51b54d44fb73a67dccf4e.addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var circle_marker_5b4ae9dceb9c71755162320a031409f2 = L.circleMarker(
|
||||
[-8.5187511, 115.2585973],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_5b4ae9dceb9c71755162320a031409f2.bindTooltip(
|
||||
`<div>
|
||||
Sacred Monkey Forest
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_602eb000016a6b30ed7c72519753de07 = L.marker(
|
||||
[-8.5187511, 115.2585973],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_452f3f1faacc701744d7c02bacafef1b = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eSacred Monkey Forest\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_602eb000016a6b30ed7c72519753de07.setIcon(div_icon_452f3f1faacc701744d7c02bacafef1b);
|
||||
|
||||
|
||||
var circle_marker_2e56d660baf35eabcbfa98ff6e8d8d11 = L.circleMarker(
|
||||
[-8.8291432, 115.0849069],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_2e56d660baf35eabcbfa98ff6e8d8d11.bindTooltip(
|
||||
`<div>
|
||||
Uluwatu Temple
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_5dd8dbfb675ede190e11f0f7ca07c3bc = L.marker(
|
||||
[-8.8291432, 115.0849069],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_2648ca76c6782f2660a05bdde37e3616 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eUluwatu Temple\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_5dd8dbfb675ede190e11f0f7ca07c3bc.setIcon(div_icon_2648ca76c6782f2660a05bdde37e3616);
|
||||
|
||||
|
||||
var circle_marker_bb05fc2ce9b498a72f2d5403de4c057a = L.circleMarker(
|
||||
[-8.673889, 115.263611],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_bb05fc2ce9b498a72f2d5403de4c057a.bindTooltip(
|
||||
`<div>
|
||||
Sanur Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_ef590832f06fd20561b013b68756a271 = L.marker(
|
||||
[-8.673889, 115.263611],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_6c27875889040e5114bd58b6dd78d565 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eSanur Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_ef590832f06fd20561b013b68756a271.setIcon(div_icon_6c27875889040e5114bd58b6dd78d565);
|
||||
|
||||
|
||||
var circle_marker_238718621a21030747436a452bfb3299 = L.circleMarker(
|
||||
[-8.618786, 115.086733],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_238718621a21030747436a452bfb3299.bindTooltip(
|
||||
`<div>
|
||||
Tanah Lot Temple
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_ae5f715c478f42e3f143541f3234b0f9 = L.marker(
|
||||
[-8.618786, 115.086733],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_66d943b7af7c007ae0e4b8134ca4900f = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eTanah Lot Temple\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_ae5f715c478f42e3f143541f3234b0f9.setIcon(div_icon_66d943b7af7c007ae0e4b8134ca4900f);
|
||||
|
||||
|
||||
var circle_marker_8771a4fca9bbd4915b07cc2700c5e89e = L.circleMarker(
|
||||
[-8.6925, 115.158611],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_8771a4fca9bbd4915b07cc2700c5e89e.bindTooltip(
|
||||
`<div>
|
||||
Seminyak Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_6bb0332dd2f02d55130e014b19bffefe = L.marker(
|
||||
[-8.6925, 115.158611],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_9a4f199406a6917c3729d735293beec4 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eSeminyak Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_6bb0332dd2f02d55130e014b19bffefe.setIcon(div_icon_9a4f199406a6917c3729d735293beec4);
|
||||
|
||||
|
||||
var circle_marker_51e42098d14cee4d8bbba1e8de44cb1a = L.circleMarker(
|
||||
[-8.791918, 115.225375],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_51e42098d14cee4d8bbba1e8de44cb1a.bindTooltip(
|
||||
`<div>
|
||||
Nusa Dua
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_6db92ef3d1d15b93e2f8951453121e0e = L.marker(
|
||||
[-8.791918, 115.225375],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_3a87774e80c4c355e408bb97f02e9e04 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eNusa Dua\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, -8],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_6db92ef3d1d15b93e2f8951453121e0e.setIcon(div_icon_3a87774e80c4c355e408bb97f02e9e04);
|
||||
|
||||
|
||||
var circle_marker_d43c0263ab8f5111318f226a7ebd0a1a = L.circleMarker(
|
||||
[-8.59128, 115.26456],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_d43c0263ab8f5111318f226a7ebd0a1a.bindTooltip(
|
||||
`<div>
|
||||
Bali Zoo
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_045f45d15d9bb0bf3544ec15c15e72ca = L.marker(
|
||||
[-8.59128, 115.26456],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_17abbfa0aa47dc5e2b90a3f3ed4031a5 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eBali Zoo\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_045f45d15d9bb0bf3544ec15c15e72ca.setIcon(div_icon_17abbfa0aa47dc5e2b90a3f3ed4031a5);
|
||||
|
||||
|
||||
var circle_marker_a7d61c5f9e133c503602ce1a176641d0 = L.circleMarker(
|
||||
[-8.23889, 115.3775],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_a7d61c5f9e133c503602ce1a176641d0.bindTooltip(
|
||||
`<div>
|
||||
Mount Batur
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_4158f6f747343e4e3a34a6decc5862c6 = L.marker(
|
||||
[-8.23889, 115.3775],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_a68ad209a222c1c6d07276e7c80e8d1c = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eMount Batur\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_4158f6f747343e4e3a34a6decc5862c6.setIcon(div_icon_a68ad209a222c1c6d07276e7c80e8d1c);
|
||||
|
||||
|
||||
var circle_marker_aed36500c42e8fc9bf3376b0e1bb2ed9 = L.circleMarker(
|
||||
[-8.275177, 115.1668487],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_aed36500c42e8fc9bf3376b0e1bb2ed9.bindTooltip(
|
||||
`<div>
|
||||
Ulun Danu Bratan
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_22a12c5d4517fbdbba1d7e4b93716e8b = L.marker(
|
||||
[-8.275177, 115.1668487],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_6ba395bc4ffc650104f4c3b4b96fa477 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eUlun Danu Bratan\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_22a12c5d4517fbdbba1d7e4b93716e8b.setIcon(div_icon_6ba395bc4ffc650104f4c3b4b96fa477);
|
||||
|
||||
|
||||
var circle_marker_9e78cc21d0b245c95b3a65818241d6b1 = L.circleMarker(
|
||||
[-8.411944, 115.5875],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_9e78cc21d0b245c95b3a65818241d6b1.bindTooltip(
|
||||
`<div>
|
||||
Tirta Gangga
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_acadfa63b305a6930490ce129db70d3c = L.marker(
|
||||
[-8.411944, 115.5875],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_6701732f8753d0cf3dd086583f966d47 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eTirta Gangga\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_acadfa63b305a6930490ce129db70d3c.setIcon(div_icon_6701732f8753d0cf3dd086583f966d47);
|
||||
|
||||
|
||||
var circle_marker_2bfd51976f3bff708534a582e4c0bf07 = L.circleMarker(
|
||||
[-8.84586, 115.18417],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_2bfd51976f3bff708534a582e4c0bf07.bindTooltip(
|
||||
`<div>
|
||||
Pandawa Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_ed85f748464576595c1995b90bd453ef = L.marker(
|
||||
[-8.84586, 115.18417],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_760441791416950ed05bce0760e785b3 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003ePandawa Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_ed85f748464576595c1995b90bd453ef.setIcon(div_icon_760441791416950ed05bce0760e785b3);
|
||||
|
||||
|
||||
var circle_marker_7905afef37932aa1ee010c0afc07b0e1 = L.circleMarker(
|
||||
[-8.79093, 115.16006],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_7905afef37932aa1ee010c0afc07b0e1.bindTooltip(
|
||||
`<div>
|
||||
Jimbaran Bay
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_b2515a0a726a9b31bb1349a731e14e83 = L.marker(
|
||||
[-8.79093, 115.16006],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_66b39b3aaa2ce168a11eb1e5842c4af5 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eJimbaran Bay\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_b2515a0a726a9b31bb1349a731e14e83.setIcon(div_icon_66b39b3aaa2ce168a11eb1e5842c4af5);
|
||||
|
||||
|
||||
var circle_marker_2e4a4da4c607525d0bd3ced67f91ba28 = L.circleMarker(
|
||||
[-8.6975074, 115.1610332],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_2e4a4da4c607525d0bd3ced67f91ba28.bindTooltip(
|
||||
`<div>
|
||||
Double Six Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_610df1ccee05f9940b5331a8c95b1ecb = L.marker(
|
||||
[-8.6975074, 115.1610332],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_ca934069aed3a67e09cd3417a4f13721 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eDouble Six Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, -8],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_610df1ccee05f9940b5331a8c95b1ecb.setIcon(div_icon_ca934069aed3a67e09cd3417a4f13721);
|
||||
|
||||
|
||||
var circle_marker_6df0392885bf12f353c499f20e4408e4 = L.circleMarker(
|
||||
[-8.690565, 115.4302884],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_6df0392885bf12f353c499f20e4408e4.bindTooltip(
|
||||
`<div>
|
||||
Devil Tears
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_cadbe0b40f9ed26e08e22f0c239a31ee = L.marker(
|
||||
[-8.690565, 115.4302884],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_7b530133b508d4cc268be38f800e05a6 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eDevil Tears\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_cadbe0b40f9ed26e08e22f0c239a31ee.setIcon(div_icon_7b530133b508d4cc268be38f800e05a6);
|
||||
|
||||
|
||||
var circle_marker_fa698e9847acafbbf4b5516fc8471f66 = L.circleMarker(
|
||||
[-8.750644, 115.474693],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_fa698e9847acafbbf4b5516fc8471f66.bindTooltip(
|
||||
`<div>
|
||||
Kelingking Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_2313407fb3b0e9bf2b11e9e793e558bf = L.marker(
|
||||
[-8.750644, 115.474693],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_cbcfa736ca9fc77147f3a561fff80c16 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eKelingking Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_2313407fb3b0e9bf2b11e9e793e558bf.setIcon(div_icon_cbcfa736ca9fc77147f3a561fff80c16);
|
||||
|
||||
|
||||
var circle_marker_47bc40126cf9256b5447c4e1983393ce = L.circleMarker(
|
||||
[-8.395195, 115.647885],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_47bc40126cf9256b5447c4e1983393ce.bindTooltip(
|
||||
`<div>
|
||||
Lempuyang Temple
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_7bb290b54979c3fed12bbe3ab8dd7b69 = L.marker(
|
||||
[-8.395195, 115.647885],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_5a34c539b7720057973544f25ff2c779 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eLempuyang Temple\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_7bb290b54979c3fed12bbe3ab8dd7b69.setIcon(div_icon_5a34c539b7720057973544f25ff2c779);
|
||||
|
||||
|
||||
var circle_marker_1a8ec5245976c9d8de699ed61d02ba8f = L.circleMarker(
|
||||
[-8.639877, 115.140172],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_1a8ec5245976c9d8de699ed61d02ba8f.bindTooltip(
|
||||
`<div>
|
||||
Canggu Beach
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_f439782dac43c98e72b2ee679dcd6acf = L.marker(
|
||||
[-8.639877, 115.140172],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_9c3d9bf434778a4e3b4c9e756f6f8a22 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eCanggu Beach\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_f439782dac43c98e72b2ee679dcd6acf.setIcon(div_icon_9c3d9bf434778a4e3b4c9e756f6f8a22);
|
||||
|
||||
|
||||
var circle_marker_ec714608b52782227236e4b16fc3de53 = L.circleMarker(
|
||||
[-8.340686, 115.503622],
|
||||
{"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 1.0, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 4, "stroke": true, "weight": 2}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
circle_marker_ec714608b52782227236e4b16fc3de53.bindTooltip(
|
||||
`<div>
|
||||
Mount Agung
|
||||
</div>`,
|
||||
{
|
||||
"sticky": true,
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
var marker_0a2b278b113476c9568e4a0cb1815202 = L.marker(
|
||||
[-8.340686, 115.503622],
|
||||
{
|
||||
}
|
||||
).addTo(map_8827cd9e27b957cf12c465a4efd53c8e);
|
||||
|
||||
|
||||
var div_icon_c36cada9c49b18e2afaed6243a4426f1 = L.divIcon({
|
||||
"html": "\u003cdiv style=\"\npadding: 3px 6px;\nfont-size: 16px;\nfont-weight: 600;\ncolor: #111;\nwhite-space: nowrap;\n\"\u003eMount Agung\u003c/div\u003e",
|
||||
"iconSize": [1, 1],
|
||||
"iconAnchor": [-8, 12],
|
||||
"className": "empty",
|
||||
});
|
||||
|
||||
|
||||
marker_0a2b278b113476c9568e4a0cb1815202.setIcon(div_icon_c36cada9c49b18e2afaed6243a4426f1);
|
||||
|
||||
|
||||
map_8827cd9e27b957cf12c465a4efd53c8e.fitBounds(
|
||||
[[-8.85086, 115.0799069], [-8.233889999999999, 115.652885]],
|
||||
{}
|
||||
);
|
||||
|
||||
</script>
|
||||
</html>
|
||||
116
figures/bali_map.py
Normal file
116
figures/bali_map.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# bali_map.py
|
||||
# Creates an interactive HTML map of Bali (and nearby islands) with readable, always-visible labels.
|
||||
|
||||
import folium
|
||||
|
||||
DESTINATIONS = {
|
||||
"Sacred Monkey Forest": (
|
||||
-8.5187511,
|
||||
115.2585973,
|
||||
), # :contentReference[oaicite:0]{index=0}
|
||||
"Uluwatu Temple": (
|
||||
-8.8291432,
|
||||
115.0849069,
|
||||
), # :contentReference[oaicite:1]{index=1}
|
||||
"Sanur Beach": (-8.673889, 115.263611), # :contentReference[oaicite:2]{index=2}
|
||||
"Tanah Lot Temple": (
|
||||
-8.618786,
|
||||
115.086733,
|
||||
), # :contentReference[oaicite:3]{index=3}
|
||||
"Seminyak Beach": (-8.6925, 115.158611), # :contentReference[oaicite:4]{index=4}
|
||||
"Nusa Dua": (-8.791918, 115.225375), # :contentReference[oaicite:5]{index=5}
|
||||
"Bali Zoo": (-8.59128, 115.26456), # :contentReference[oaicite:6]{index=6}
|
||||
"Mount Batur": (-8.23889, 115.37750), # :contentReference[oaicite:7]{index=7}
|
||||
"Ulun Danu Bratan": (
|
||||
-8.275177,
|
||||
115.1668487,
|
||||
), # :contentReference[oaicite:8]{index=8}
|
||||
"Tirta Gangga": (-8.411944, 115.5875), # :contentReference[oaicite:9]{index=9}
|
||||
"Pandawa Beach": (-8.84586, 115.18417), # :contentReference[oaicite:10]{index=10}
|
||||
"Jimbaran Bay": (-8.79093, 115.16006), # :contentReference[oaicite:11]{index=11}
|
||||
"Double Six Beach": (
|
||||
-8.6975074,
|
||||
115.1610332,
|
||||
), # :contentReference[oaicite:12]{index=12}
|
||||
"Devil Tears": (-8.6905650, 115.4302884), # :contentReference[oaicite:13]{index=13}
|
||||
"Kelingking Beach": (
|
||||
-8.750644,
|
||||
115.474693,
|
||||
), # :contentReference[oaicite:14]{index=14}
|
||||
"Lempuyang Temple": (
|
||||
-8.395195,
|
||||
115.647885,
|
||||
), # :contentReference[oaicite:15]{index=15}
|
||||
"Canggu Beach": (-8.639877, 115.140172), # :contentReference[oaicite:16]{index=16}
|
||||
"Mount Agung": (-8.340686, 115.503622), # :contentReference[oaicite:17]{index=17}
|
||||
}
|
||||
|
||||
# --- Map base ---
|
||||
m = folium.Map(
|
||||
location=(-8.45, 115.20),
|
||||
zoom_start=9,
|
||||
tiles="CartoDB positron",
|
||||
control_scale=True,
|
||||
zoom_snap=0.1,
|
||||
zoom_delta=0.1,
|
||||
max_zoom=18,
|
||||
)
|
||||
|
||||
# --- Label styling (readable, always visible) ---
|
||||
LABEL_STYLE = """
|
||||
padding: 3px 6px;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
color: #111;
|
||||
white-space: nowrap;
|
||||
"""
|
||||
|
||||
# Per-label pixel offsets (x, y). Positive y moves the label down.
|
||||
LABEL_OFFSETS = {
|
||||
"Nusa Dua": (0, 20),
|
||||
"Double Six Beach": (0, 20),
|
||||
}
|
||||
|
||||
|
||||
def add_point_with_label(name: str, lat: float, lon: float):
|
||||
# Small dot at the exact coordinate
|
||||
folium.CircleMarker(
|
||||
location=(lat, lon),
|
||||
radius=4,
|
||||
weight=2,
|
||||
fill=True,
|
||||
fill_opacity=1.0,
|
||||
tooltip=name, # still useful on hover
|
||||
).add_to(m)
|
||||
|
||||
# Slightly offset label so it doesn't sit directly on the dot
|
||||
offset_x, offset_y = LABEL_OFFSETS.get(name, (0, 0))
|
||||
base_anchor_x, base_anchor_y = (-8, 12)
|
||||
folium.Marker(
|
||||
location=(lat, lon),
|
||||
icon=folium.DivIcon(
|
||||
icon_size=(1, 1),
|
||||
icon_anchor=(
|
||||
base_anchor_x + offset_x,
|
||||
base_anchor_y - offset_y,
|
||||
), # pixel offset: left/up relative to point
|
||||
html=f'<div style="{LABEL_STYLE}">{name}</div>',
|
||||
),
|
||||
).add_to(m)
|
||||
|
||||
|
||||
# Add all destinations
|
||||
lats, lons = [], []
|
||||
for name, (lat, lon) in DESTINATIONS.items():
|
||||
add_point_with_label(name, lat, lon)
|
||||
lats.append(lat)
|
||||
lons.append(lon)
|
||||
|
||||
# Fit map bounds to include Nusa Penida / Lembongan as well
|
||||
pad = 0.005
|
||||
m.fit_bounds([[min(lats) - pad, min(lons) - pad], [max(lats) + pad, max(lons) + pad]])
|
||||
|
||||
# Output
|
||||
out_file = "bali_destinations_labeled.html"
|
||||
m.save(out_file)
|
||||
print(f"Saved: {out_file}")
|
||||
114
figures/bargraph.py
Normal file
114
figures/bargraph.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def load_json_data(file_path):
|
||||
"""
|
||||
Load and validate JSON data from a file.
|
||||
Expected format:
|
||||
{
|
||||
"label1": value1,
|
||||
"label2": value2,
|
||||
...
|
||||
}
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(
|
||||
"JSON must be an object with key-value pairs (labels: values)."
|
||||
)
|
||||
|
||||
for key, value in data.items():
|
||||
if not isinstance(key, str):
|
||||
raise ValueError("All keys must be strings (labels).")
|
||||
if not isinstance(value, (int, float)):
|
||||
raise ValueError("All values must be numeric (int or float).")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def create_bar_graph(
|
||||
data, title="Bar Graph", x_label="Labels", y_label="Values", output=None
|
||||
):
|
||||
"""
|
||||
Create a bar graph from a dictionary of data.
|
||||
"""
|
||||
labels = list(data.keys())
|
||||
values = list(data.values())
|
||||
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.bar(labels, values)
|
||||
plt.xlabel(x_label)
|
||||
plt.ylabel(y_label)
|
||||
plt.title(title)
|
||||
plt.xticks(rotation=45)
|
||||
plt.tight_layout()
|
||||
|
||||
if output:
|
||||
plt.savefig(output)
|
||||
print(f"Graph saved to: {output}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate a bar graph from a JSON file containing key-value pairs."
|
||||
)
|
||||
parser.add_argument(
|
||||
"json_path",
|
||||
type=str,
|
||||
help="Path to the JSON file (e.g., data.json)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--title",
|
||||
type=str,
|
||||
default="Bar Graph",
|
||||
help="Title of the bar graph",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--x_label",
|
||||
type=str,
|
||||
default="Labels",
|
||||
help="Label for the x-axis",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--y_label",
|
||||
type=str,
|
||||
default="Values",
|
||||
help="Label for the y-axis",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Optional output file path (e.g., graph.png). If not provided, the graph will be displayed.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
data = load_json_data(args.json_path)
|
||||
create_bar_graph(
|
||||
data,
|
||||
title=args.title,
|
||||
x_label=args.x_label,
|
||||
y_label=args.y_label,
|
||||
output=args.output,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
3
figures/requirements.txt
Normal file
3
figures/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
matplotlib
|
||||
folium
|
||||
pandas
|
||||
101
figures/review_dist.py
Normal file
101
figures/review_dist.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Read a .tab (TSV) file with a single column named 'review'.
|
||||
1) Print number of rows
|
||||
2) Drop exact duplicate reviews and print count again
|
||||
3) Build JSON describing the distribution of review length (in words) for remaining reviews
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def word_count(text: str) -> int:
|
||||
# Count words by whitespace splitting after stripping.
|
||||
# Treat non-string / NaN as 0 words (you can change this if you want to drop them).
|
||||
if not isinstance(text, str):
|
||||
return 0
|
||||
s = text.strip()
|
||||
if not s:
|
||||
return 0
|
||||
return len(s.split())
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"input_tab", help="Path to .tab/.tsv file with a 'review' column"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
default="review_length_distribution.json",
|
||||
help="Output JSON path (default: review_length_distribution.json)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
in_path = Path(args.input_tab)
|
||||
if not in_path.exists():
|
||||
print(f"ERROR: file not found: {in_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Read as TSV. Keep empty strings; pandas will use NaN for empty fields unless keep_default_na=False.
|
||||
df = pd.read_csv(in_path, sep="\t", dtype=str, keep_default_na=False)
|
||||
|
||||
if "review" not in df.columns:
|
||||
print(
|
||||
f"ERROR: expected a column named 'review'. Found: {list(df.columns)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
n_before = len(df)
|
||||
print(f"Rows before dedup: {n_before}")
|
||||
|
||||
# Exact duplicates based on the full string in "review".
|
||||
# If you want to ignore leading/trailing spaces, do df['review']=df['review'].str.strip() first.
|
||||
df_dedup = df.drop_duplicates(subset=["review"], keep="first").reset_index(
|
||||
drop=True
|
||||
)
|
||||
|
||||
n_after = len(df_dedup)
|
||||
print(f"Rows after dedup: {n_after}")
|
||||
|
||||
# Compute word counts for remaining reviews
|
||||
lengths = df_dedup["review"].map(word_count)
|
||||
|
||||
# Distribution (histogram): word_count -> number of reviews
|
||||
dist = Counter(lengths.tolist())
|
||||
|
||||
result = {
|
||||
"file": str(in_path),
|
||||
"rows_before_dedup": n_before,
|
||||
"rows_after_dedup": n_after,
|
||||
"distribution_word_length": {
|
||||
# JSON keys must be strings; keep as strings for portability.
|
||||
str(k): v
|
||||
for k, v in sorted(dist.items(), key=lambda kv: int(kv[0]))
|
||||
},
|
||||
"summary": {
|
||||
"min_words": int(lengths.min()) if len(lengths) else 0,
|
||||
"max_words": int(lengths.max()) if len(lengths) else 0,
|
||||
"mean_words": float(lengths.mean()) if len(lengths) else 0.0,
|
||||
"median_words": float(lengths.median()) if len(lengths) else 0.0,
|
||||
},
|
||||
}
|
||||
|
||||
out_path = Path(args.out)
|
||||
out_path.write_text(
|
||||
json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print(f"Wrote JSON: {out_path}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
604
figures/review_length_info.json
Normal file
604
figures/review_length_info.json
Normal file
@@ -0,0 +1,604 @@
|
||||
{
|
||||
"file": "../data/original/reviews.tab",
|
||||
"rows_before_dedup": 56446,
|
||||
"rows_after_dedup": 55662,
|
||||
"distribution_word_length": {
|
||||
"8": 1,
|
||||
"9": 5,
|
||||
"10": 10,
|
||||
"11": 14,
|
||||
"12": 20,
|
||||
"13": 29,
|
||||
"14": 37,
|
||||
"15": 92,
|
||||
"16": 163,
|
||||
"17": 308,
|
||||
"18": 482,
|
||||
"19": 728,
|
||||
"20": 859,
|
||||
"21": 977,
|
||||
"22": 944,
|
||||
"23": 989,
|
||||
"24": 937,
|
||||
"25": 1032,
|
||||
"26": 946,
|
||||
"27": 927,
|
||||
"28": 928,
|
||||
"29": 920,
|
||||
"30": 926,
|
||||
"31": 879,
|
||||
"32": 897,
|
||||
"33": 856,
|
||||
"34": 759,
|
||||
"35": 829,
|
||||
"36": 774,
|
||||
"37": 708,
|
||||
"38": 771,
|
||||
"39": 717,
|
||||
"40": 693,
|
||||
"41": 737,
|
||||
"42": 734,
|
||||
"43": 655,
|
||||
"44": 616,
|
||||
"45": 630,
|
||||
"46": 680,
|
||||
"47": 609,
|
||||
"48": 588,
|
||||
"49": 586,
|
||||
"50": 598,
|
||||
"51": 562,
|
||||
"52": 543,
|
||||
"53": 563,
|
||||
"54": 549,
|
||||
"55": 551,
|
||||
"56": 478,
|
||||
"57": 522,
|
||||
"58": 450,
|
||||
"59": 515,
|
||||
"60": 509,
|
||||
"61": 461,
|
||||
"62": 453,
|
||||
"63": 451,
|
||||
"64": 483,
|
||||
"65": 403,
|
||||
"66": 442,
|
||||
"67": 404,
|
||||
"68": 418,
|
||||
"69": 389,
|
||||
"70": 394,
|
||||
"71": 355,
|
||||
"72": 357,
|
||||
"73": 389,
|
||||
"74": 360,
|
||||
"75": 356,
|
||||
"76": 338,
|
||||
"77": 330,
|
||||
"78": 308,
|
||||
"79": 327,
|
||||
"80": 303,
|
||||
"81": 302,
|
||||
"82": 306,
|
||||
"83": 273,
|
||||
"84": 276,
|
||||
"85": 265,
|
||||
"86": 268,
|
||||
"87": 263,
|
||||
"88": 264,
|
||||
"89": 229,
|
||||
"90": 244,
|
||||
"91": 239,
|
||||
"92": 212,
|
||||
"93": 267,
|
||||
"94": 211,
|
||||
"95": 226,
|
||||
"96": 247,
|
||||
"97": 219,
|
||||
"98": 239,
|
||||
"99": 201,
|
||||
"100": 220,
|
||||
"101": 213,
|
||||
"102": 180,
|
||||
"103": 194,
|
||||
"104": 204,
|
||||
"105": 201,
|
||||
"106": 200,
|
||||
"107": 149,
|
||||
"108": 189,
|
||||
"109": 196,
|
||||
"110": 178,
|
||||
"111": 140,
|
||||
"112": 157,
|
||||
"113": 150,
|
||||
"114": 160,
|
||||
"115": 130,
|
||||
"116": 151,
|
||||
"117": 159,
|
||||
"118": 151,
|
||||
"119": 118,
|
||||
"120": 138,
|
||||
"121": 115,
|
||||
"122": 107,
|
||||
"123": 121,
|
||||
"124": 99,
|
||||
"125": 135,
|
||||
"126": 126,
|
||||
"127": 125,
|
||||
"128": 97,
|
||||
"129": 99,
|
||||
"130": 95,
|
||||
"131": 92,
|
||||
"132": 86,
|
||||
"133": 108,
|
||||
"134": 115,
|
||||
"135": 101,
|
||||
"136": 101,
|
||||
"137": 103,
|
||||
"138": 91,
|
||||
"139": 81,
|
||||
"140": 92,
|
||||
"141": 91,
|
||||
"142": 95,
|
||||
"143": 76,
|
||||
"144": 84,
|
||||
"145": 91,
|
||||
"146": 84,
|
||||
"147": 87,
|
||||
"148": 92,
|
||||
"149": 73,
|
||||
"150": 78,
|
||||
"151": 71,
|
||||
"152": 76,
|
||||
"153": 87,
|
||||
"154": 60,
|
||||
"155": 67,
|
||||
"156": 67,
|
||||
"157": 88,
|
||||
"158": 56,
|
||||
"159": 66,
|
||||
"160": 41,
|
||||
"161": 56,
|
||||
"162": 61,
|
||||
"163": 68,
|
||||
"164": 62,
|
||||
"165": 67,
|
||||
"166": 52,
|
||||
"167": 62,
|
||||
"168": 47,
|
||||
"169": 41,
|
||||
"170": 49,
|
||||
"171": 47,
|
||||
"172": 43,
|
||||
"173": 39,
|
||||
"174": 61,
|
||||
"175": 56,
|
||||
"176": 55,
|
||||
"177": 47,
|
||||
"178": 34,
|
||||
"179": 44,
|
||||
"180": 43,
|
||||
"181": 37,
|
||||
"182": 48,
|
||||
"183": 47,
|
||||
"184": 39,
|
||||
"185": 38,
|
||||
"186": 42,
|
||||
"187": 42,
|
||||
"188": 35,
|
||||
"189": 43,
|
||||
"190": 39,
|
||||
"191": 38,
|
||||
"192": 37,
|
||||
"193": 27,
|
||||
"194": 28,
|
||||
"195": 40,
|
||||
"196": 33,
|
||||
"197": 36,
|
||||
"198": 40,
|
||||
"199": 35,
|
||||
"200": 30,
|
||||
"201": 28,
|
||||
"202": 28,
|
||||
"203": 26,
|
||||
"204": 28,
|
||||
"205": 32,
|
||||
"206": 31,
|
||||
"207": 36,
|
||||
"208": 36,
|
||||
"209": 24,
|
||||
"210": 20,
|
||||
"211": 34,
|
||||
"212": 26,
|
||||
"213": 31,
|
||||
"214": 27,
|
||||
"215": 25,
|
||||
"216": 23,
|
||||
"217": 26,
|
||||
"218": 20,
|
||||
"219": 20,
|
||||
"220": 20,
|
||||
"221": 28,
|
||||
"222": 15,
|
||||
"223": 18,
|
||||
"224": 17,
|
||||
"225": 22,
|
||||
"226": 16,
|
||||
"227": 29,
|
||||
"228": 27,
|
||||
"229": 23,
|
||||
"230": 14,
|
||||
"231": 23,
|
||||
"232": 22,
|
||||
"233": 21,
|
||||
"234": 23,
|
||||
"235": 16,
|
||||
"236": 18,
|
||||
"237": 14,
|
||||
"238": 11,
|
||||
"239": 17,
|
||||
"240": 8,
|
||||
"241": 16,
|
||||
"242": 12,
|
||||
"243": 18,
|
||||
"244": 15,
|
||||
"245": 11,
|
||||
"246": 24,
|
||||
"247": 14,
|
||||
"248": 18,
|
||||
"249": 15,
|
||||
"250": 11,
|
||||
"251": 17,
|
||||
"252": 17,
|
||||
"253": 15,
|
||||
"254": 17,
|
||||
"255": 18,
|
||||
"256": 14,
|
||||
"257": 21,
|
||||
"258": 13,
|
||||
"259": 16,
|
||||
"260": 10,
|
||||
"261": 20,
|
||||
"262": 8,
|
||||
"263": 9,
|
||||
"264": 11,
|
||||
"265": 16,
|
||||
"266": 6,
|
||||
"267": 14,
|
||||
"268": 14,
|
||||
"269": 12,
|
||||
"270": 11,
|
||||
"271": 12,
|
||||
"272": 9,
|
||||
"273": 5,
|
||||
"274": 7,
|
||||
"275": 4,
|
||||
"276": 6,
|
||||
"277": 10,
|
||||
"278": 11,
|
||||
"279": 13,
|
||||
"280": 7,
|
||||
"281": 9,
|
||||
"282": 6,
|
||||
"283": 9,
|
||||
"284": 10,
|
||||
"285": 9,
|
||||
"286": 11,
|
||||
"287": 8,
|
||||
"288": 5,
|
||||
"289": 6,
|
||||
"290": 8,
|
||||
"291": 4,
|
||||
"292": 11,
|
||||
"293": 6,
|
||||
"294": 11,
|
||||
"295": 11,
|
||||
"296": 7,
|
||||
"297": 4,
|
||||
"298": 7,
|
||||
"299": 13,
|
||||
"300": 7,
|
||||
"301": 15,
|
||||
"302": 10,
|
||||
"303": 7,
|
||||
"304": 11,
|
||||
"305": 3,
|
||||
"306": 7,
|
||||
"307": 8,
|
||||
"308": 6,
|
||||
"309": 4,
|
||||
"310": 7,
|
||||
"311": 4,
|
||||
"312": 8,
|
||||
"313": 5,
|
||||
"314": 1,
|
||||
"315": 8,
|
||||
"316": 8,
|
||||
"317": 9,
|
||||
"318": 8,
|
||||
"319": 6,
|
||||
"320": 8,
|
||||
"321": 2,
|
||||
"322": 8,
|
||||
"323": 6,
|
||||
"324": 9,
|
||||
"325": 6,
|
||||
"326": 8,
|
||||
"327": 3,
|
||||
"328": 8,
|
||||
"329": 7,
|
||||
"330": 5,
|
||||
"331": 8,
|
||||
"332": 7,
|
||||
"333": 2,
|
||||
"334": 1,
|
||||
"335": 9,
|
||||
"336": 4,
|
||||
"337": 6,
|
||||
"338": 4,
|
||||
"339": 3,
|
||||
"340": 6,
|
||||
"341": 5,
|
||||
"342": 3,
|
||||
"343": 4,
|
||||
"344": 3,
|
||||
"345": 5,
|
||||
"346": 3,
|
||||
"347": 5,
|
||||
"348": 3,
|
||||
"349": 3,
|
||||
"350": 3,
|
||||
"351": 2,
|
||||
"352": 8,
|
||||
"353": 4,
|
||||
"354": 4,
|
||||
"355": 4,
|
||||
"356": 3,
|
||||
"357": 4,
|
||||
"358": 3,
|
||||
"359": 3,
|
||||
"360": 8,
|
||||
"361": 6,
|
||||
"362": 5,
|
||||
"363": 8,
|
||||
"364": 4,
|
||||
"365": 6,
|
||||
"366": 3,
|
||||
"367": 7,
|
||||
"368": 4,
|
||||
"369": 8,
|
||||
"370": 2,
|
||||
"371": 2,
|
||||
"372": 7,
|
||||
"373": 5,
|
||||
"374": 4,
|
||||
"375": 1,
|
||||
"376": 1,
|
||||
"377": 3,
|
||||
"378": 1,
|
||||
"379": 2,
|
||||
"380": 2,
|
||||
"381": 2,
|
||||
"382": 3,
|
||||
"383": 2,
|
||||
"384": 1,
|
||||
"385": 1,
|
||||
"386": 2,
|
||||
"387": 4,
|
||||
"388": 6,
|
||||
"389": 4,
|
||||
"390": 4,
|
||||
"391": 3,
|
||||
"392": 3,
|
||||
"393": 2,
|
||||
"394": 2,
|
||||
"395": 7,
|
||||
"396": 6,
|
||||
"397": 2,
|
||||
"398": 2,
|
||||
"401": 1,
|
||||
"402": 5,
|
||||
"403": 1,
|
||||
"404": 3,
|
||||
"405": 4,
|
||||
"406": 1,
|
||||
"407": 1,
|
||||
"409": 3,
|
||||
"410": 2,
|
||||
"411": 1,
|
||||
"412": 1,
|
||||
"413": 2,
|
||||
"414": 3,
|
||||
"415": 4,
|
||||
"416": 2,
|
||||
"417": 2,
|
||||
"418": 3,
|
||||
"419": 1,
|
||||
"420": 2,
|
||||
"421": 4,
|
||||
"422": 1,
|
||||
"424": 3,
|
||||
"425": 4,
|
||||
"426": 4,
|
||||
"427": 1,
|
||||
"428": 1,
|
||||
"429": 2,
|
||||
"430": 2,
|
||||
"431": 4,
|
||||
"433": 1,
|
||||
"434": 1,
|
||||
"436": 1,
|
||||
"437": 1,
|
||||
"438": 5,
|
||||
"439": 1,
|
||||
"440": 2,
|
||||
"441": 1,
|
||||
"443": 4,
|
||||
"444": 3,
|
||||
"445": 1,
|
||||
"446": 5,
|
||||
"448": 1,
|
||||
"449": 4,
|
||||
"451": 2,
|
||||
"452": 1,
|
||||
"455": 3,
|
||||
"456": 1,
|
||||
"457": 1,
|
||||
"458": 1,
|
||||
"459": 1,
|
||||
"463": 2,
|
||||
"464": 1,
|
||||
"465": 2,
|
||||
"466": 2,
|
||||
"467": 2,
|
||||
"469": 1,
|
||||
"470": 1,
|
||||
"474": 1,
|
||||
"475": 5,
|
||||
"476": 1,
|
||||
"477": 1,
|
||||
"478": 1,
|
||||
"479": 3,
|
||||
"481": 1,
|
||||
"482": 1,
|
||||
"484": 1,
|
||||
"485": 2,
|
||||
"489": 1,
|
||||
"490": 1,
|
||||
"494": 3,
|
||||
"495": 1,
|
||||
"497": 1,
|
||||
"499": 1,
|
||||
"501": 1,
|
||||
"502": 1,
|
||||
"503": 1,
|
||||
"504": 1,
|
||||
"505": 1,
|
||||
"506": 1,
|
||||
"508": 3,
|
||||
"510": 2,
|
||||
"511": 4,
|
||||
"518": 1,
|
||||
"519": 2,
|
||||
"520": 1,
|
||||
"522": 1,
|
||||
"523": 1,
|
||||
"524": 1,
|
||||
"525": 1,
|
||||
"526": 1,
|
||||
"527": 1,
|
||||
"537": 1,
|
||||
"540": 1,
|
||||
"541": 1,
|
||||
"543": 1,
|
||||
"545": 2,
|
||||
"546": 3,
|
||||
"554": 1,
|
||||
"555": 1,
|
||||
"557": 2,
|
||||
"558": 1,
|
||||
"559": 1,
|
||||
"562": 1,
|
||||
"564": 3,
|
||||
"566": 1,
|
||||
"568": 1,
|
||||
"573": 1,
|
||||
"578": 2,
|
||||
"580": 2,
|
||||
"581": 1,
|
||||
"583": 1,
|
||||
"584": 1,
|
||||
"585": 1,
|
||||
"586": 1,
|
||||
"588": 1,
|
||||
"592": 1,
|
||||
"594": 2,
|
||||
"595": 1,
|
||||
"597": 2,
|
||||
"598": 1,
|
||||
"601": 1,
|
||||
"609": 1,
|
||||
"610": 1,
|
||||
"612": 1,
|
||||
"613": 2,
|
||||
"615": 1,
|
||||
"618": 2,
|
||||
"620": 2,
|
||||
"622": 1,
|
||||
"623": 1,
|
||||
"624": 1,
|
||||
"626": 1,
|
||||
"635": 1,
|
||||
"637": 1,
|
||||
"639": 1,
|
||||
"643": 2,
|
||||
"645": 1,
|
||||
"649": 2,
|
||||
"651": 1,
|
||||
"654": 1,
|
||||
"658": 1,
|
||||
"661": 1,
|
||||
"667": 1,
|
||||
"670": 1,
|
||||
"671": 1,
|
||||
"672": 1,
|
||||
"673": 1,
|
||||
"676": 1,
|
||||
"679": 2,
|
||||
"686": 1,
|
||||
"691": 1,
|
||||
"694": 2,
|
||||
"698": 1,
|
||||
"701": 1,
|
||||
"708": 1,
|
||||
"710": 1,
|
||||
"711": 1,
|
||||
"715": 1,
|
||||
"719": 1,
|
||||
"723": 1,
|
||||
"729": 2,
|
||||
"737": 1,
|
||||
"739": 1,
|
||||
"745": 1,
|
||||
"747": 1,
|
||||
"753": 1,
|
||||
"755": 1,
|
||||
"756": 1,
|
||||
"765": 1,
|
||||
"786": 1,
|
||||
"794": 1,
|
||||
"799": 1,
|
||||
"810": 1,
|
||||
"813": 1,
|
||||
"816": 2,
|
||||
"822": 1,
|
||||
"873": 1,
|
||||
"880": 1,
|
||||
"891": 1,
|
||||
"912": 1,
|
||||
"945": 1,
|
||||
"957": 1,
|
||||
"960": 1,
|
||||
"987": 1,
|
||||
"992": 1,
|
||||
"1005": 1,
|
||||
"1035": 1,
|
||||
"1046": 1,
|
||||
"1073": 1,
|
||||
"1096": 1,
|
||||
"1099": 1,
|
||||
"1196": 2,
|
||||
"1233": 1,
|
||||
"1263": 1,
|
||||
"1329": 1,
|
||||
"1597": 1,
|
||||
"1699": 1,
|
||||
"1893": 1,
|
||||
"2244": 1,
|
||||
"2537": 1
|
||||
},
|
||||
"summary": {
|
||||
"min_words": 8,
|
||||
"max_words": 2537,
|
||||
"mean_words": 72.6454133879487,
|
||||
"median_words": 53.0
|
||||
}
|
||||
}
|
||||
31
figures/review_lengths.json
Normal file
31
figures/review_lengths.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"<10": 6,
|
||||
"10-19": 1883,
|
||||
"20-29": 9459,
|
||||
"30-39": 8116,
|
||||
"40-49": 6528,
|
||||
"50-59": 5331,
|
||||
"60-69": 4413,
|
||||
"70-79": 3514,
|
||||
"80-89": 2749,
|
||||
"90-99": 2305,
|
||||
"100-109": 1946,
|
||||
"110-119": 1494,
|
||||
"120-129": 1162,
|
||||
"130-139": 973,
|
||||
"140-149": 865,
|
||||
"150-159": 716,
|
||||
"160-169": 557,
|
||||
"170-179": 475,
|
||||
"180-189": 414,
|
||||
"190-199": 353,
|
||||
"200-219": 551,
|
||||
"220-239": 394,
|
||||
"240-259": 310,
|
||||
"260-279": 208,
|
||||
"280-299": 162,
|
||||
"300-399": 479,
|
||||
"400-499": 145,
|
||||
"500-999": 138,
|
||||
"1000+": 16
|
||||
}
|
||||
20
figures/reviews_attraktionen.json
Normal file
20
figures/reviews_attraktionen.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"Sacred Monkey\nForest": 18542,
|
||||
"Uluwatu Temple": 5902,
|
||||
"Sanur Beach": 4526,
|
||||
"Tanah Lot Temple": 4218,
|
||||
"Seminyak Beach": 3761,
|
||||
"Nusa Dua": 3324,
|
||||
"Bali Zoo": 2640,
|
||||
"Mount Batur": 1815,
|
||||
"Ulun Danu Bratan": 1722,
|
||||
"Tirta Gangga": 1557,
|
||||
"Pandawa Beach": 1511,
|
||||
"Jimbaran Bay": 1430,
|
||||
"Double Six Beach": 1323,
|
||||
"Devil Tears": 1263,
|
||||
"Kelingking Beach": 713,
|
||||
"Lempuyang Temple": 596,
|
||||
"Canggu Beach": 555,
|
||||
"Mount Agung": 266
|
||||
}
|
||||
97
figures/simplify_review_lengths.py
Normal file
97
figures/simplify_review_lengths.py
Normal file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Aggregate review length counts into buckets."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, Tuple
|
||||
|
||||
Bucket = Tuple[int | None, int | None, str]
|
||||
|
||||
|
||||
DEFAULT_BUCKETS: Tuple[Bucket, ...] = (
|
||||
(None, 9, "<10"),
|
||||
(10, 19, "10-19"),
|
||||
(20, 29, "20-29"),
|
||||
(30, 39, "30-39"),
|
||||
(40, 49, "40-49"),
|
||||
(50, 59, "50-59"),
|
||||
(60, 69, "60-69"),
|
||||
(70, 79, "70-79"),
|
||||
(80, 89, "80-89"),
|
||||
(90, 99, "90-99"),
|
||||
(100, 109, "100-109"),
|
||||
(110, 119, "110-119"),
|
||||
(120, 129, "120-129"),
|
||||
(130, 139, "130-139"),
|
||||
(140, 149, "140-149"),
|
||||
(150, 159, "150-159"),
|
||||
(160, 169, "160-169"),
|
||||
(170, 179, "170-179"),
|
||||
(180, 189, "180-189"),
|
||||
(190, 199, "190-199"),
|
||||
(200, 219, "200-219"),
|
||||
(220, 239, "220-239"),
|
||||
(240, 259, "240-259"),
|
||||
(260, 279, "260-279"),
|
||||
(280, 299, "280-299"),
|
||||
(300, 399, "300-399"),
|
||||
(400, 499, "400-499"),
|
||||
(500, 999, "500-999"),
|
||||
(1000, None, "1000+"),
|
||||
)
|
||||
|
||||
|
||||
def load_counts(path: Path) -> Dict[int, int]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
raw = json.load(handle)
|
||||
return {int(k): int(v) for k, v in raw.items()}
|
||||
|
||||
|
||||
def aggregate(counts: Dict[int, int], buckets: Iterable[Bucket]) -> Dict[str, int]:
|
||||
output: Dict[str, int] = {label: 0 for _, _, label in buckets}
|
||||
for length, count in counts.items():
|
||||
for start, end, label in buckets:
|
||||
if start is None and end is not None and length <= end:
|
||||
output[label] += count
|
||||
break
|
||||
if end is None and start is not None and length >= start:
|
||||
output[label] += count
|
||||
break
|
||||
if start is not None and end is not None and start <= length <= end:
|
||||
output[label] += count
|
||||
break
|
||||
else:
|
||||
raise ValueError(f"No bucket found for length {length}.")
|
||||
return output
|
||||
|
||||
|
||||
def write_output(path: Path, data: Dict[str, int]) -> None:
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(data, handle, indent=2, ensure_ascii=False)
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Bucket review length counts.")
|
||||
parser.add_argument(
|
||||
"input",
|
||||
type=Path,
|
||||
help="Path to review_lengths.json (mapping of length -> count).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"output",
|
||||
type=Path,
|
||||
help="Path to write bucketed counts JSON.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
counts = load_counts(args.input)
|
||||
bucketed = aggregate(counts, DEFAULT_BUCKETS)
|
||||
write_output(args.output, bucketed)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user