Compare commits

...

3 Commits

16 changed files with 223226 additions and 105762 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@
__pycache__/ __pycache__/
**.bertopic **.bertopic
history*.json history*.json
model.pkl

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

118440
bertopic_lowprep.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1207,11 +1207,11 @@
"topics": [ "topics": [
{ {
"topic_id": 2, "topic_id": 2,
"probability": 0.56 "probability": 0.55
} }
], ],
"dominant_topic": 2, "dominant_topic": 2,
"dominant_probability": 0.56 "dominant_probability": 0.55
}, },
{ {
"document_id": 93, "document_id": 93,
@@ -122923,11 +122923,11 @@
}, },
{ {
"topic_id": 2, "topic_id": 2,
"probability": 0.41 "probability": 0.4
} }
], ],
"dominant_topic": 2, "dominant_topic": 2,
"dominant_probability": 0.41 "dominant_probability": 0.4
}, },
{ {
"document_id": 9489, "document_id": 9489,
@@ -216167,11 +216167,11 @@
}, },
{ {
"topic_id": 2, "topic_id": 2,
"probability": 0.5 "probability": 0.51
} }
], ],
"dominant_topic": 2, "dominant_topic": 2,
"dominant_probability": 0.5 "dominant_probability": 0.51
}, },
{ {
"document_id": 16639, "document_id": 16639,
@@ -238519,11 +238519,11 @@
"topics": [ "topics": [
{ {
"topic_id": 2, "topic_id": 2,
"probability": 0.6 "probability": 0.61
} }
], ],
"dominant_topic": 2, "dominant_topic": 2,
"dominant_probability": 0.6 "dominant_probability": 0.61
}, },
{ {
"document_id": 18375, "document_id": 18375,
@@ -383083,11 +383083,11 @@
"topics": [ "topics": [
{ {
"topic_id": 0, "topic_id": 0,
"probability": 0.54 "probability": 0.55
} }
], ],
"dominant_topic": 0, "dominant_topic": 0,
"dominant_probability": 0.54 "dominant_probability": 0.55
}, },
{ {
"document_id": 29573, "document_id": 29573,
@@ -446754,7 +446754,7 @@
"probability": 0.37 "probability": 0.37
} }
], ],
"dominant_topic": 0, "dominant_topic": 2,
"dominant_probability": 0.37 "dominant_probability": 0.37
}, },
{ {
@@ -544884,6 +544884,10 @@
{ {
"topic_id": 0, "topic_id": 0,
"probability": 0.38 "probability": 0.38
},
{
"topic_id": 1,
"probability": 0.35
} }
], ],
"dominant_topic": 0, "dominant_topic": 0,
@@ -596780,10 +596784,6 @@
{ {
"topic_id": 0, "topic_id": 0,
"probability": 0.43 "probability": 0.43
},
{
"topic_id": 2,
"probability": 0.35
} }
], ],
"dominant_topic": 0, "dominant_topic": 0,
@@ -672275,11 +672275,11 @@
"topics": [ "topics": [
{ {
"topic_id": 2, "topic_id": 2,
"probability": 0.42 "probability": 0.41
} }
], ],
"dominant_topic": 2, "dominant_topic": 2,
"dominant_probability": 0.42 "dominant_probability": 0.41
}, },
{ {
"document_id": 52182, "document_id": 52182,

BIN
processed_texts_lowprep.pkl Normal file

Binary file not shown.

BIN
processed_texts_top2vec.pkl Normal file

Binary file not shown.

1589
top2vec.ipynb Normal file

File diff suppressed because it is too large Load Diff

BIN
top2vec/corpus.pkl Normal file

Binary file not shown.

BIN
top2vec/dictionary.pkl Normal file

Binary file not shown.

BIN
top2vec/tokens.pkl Normal file

Binary file not shown.