Compare commits

...

2 Commits

Author SHA1 Message Date
3e5669d213 Updated BERTopic, added lowprep version 2025-06-20 02:57:35 +02:00
a9343cd79b Added Perplexity to LDA 2025-06-20 02:56:31 +02:00
10 changed files with 212652 additions and 97337 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

117841
bertopic_lowprep.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1207,11 +1207,11 @@
"topics": [
{
"topic_id": 2,
"probability": 0.56
"probability": 0.55
}
],
"dominant_topic": 2,
"dominant_probability": 0.56
"dominant_probability": 0.55
},
{
"document_id": 93,
@@ -122923,11 +122923,11 @@
},
{
"topic_id": 2,
"probability": 0.41
"probability": 0.4
}
],
"dominant_topic": 2,
"dominant_probability": 0.41
"dominant_probability": 0.4
},
{
"document_id": 9489,
@@ -216167,11 +216167,11 @@
},
{
"topic_id": 2,
"probability": 0.5
"probability": 0.51
}
],
"dominant_topic": 2,
"dominant_probability": 0.5
"dominant_probability": 0.51
},
{
"document_id": 16639,
@@ -238519,11 +238519,11 @@
"topics": [
{
"topic_id": 2,
"probability": 0.6
"probability": 0.61
}
],
"dominant_topic": 2,
"dominant_probability": 0.6
"dominant_probability": 0.61
},
{
"document_id": 18375,
@@ -383083,11 +383083,11 @@
"topics": [
{
"topic_id": 0,
"probability": 0.54
"probability": 0.55
}
],
"dominant_topic": 0,
"dominant_probability": 0.54
"dominant_probability": 0.55
},
{
"document_id": 29573,
@@ -446754,7 +446754,7 @@
"probability": 0.37
}
],
"dominant_topic": 0,
"dominant_topic": 2,
"dominant_probability": 0.37
},
{
@@ -544884,6 +544884,10 @@
{
"topic_id": 0,
"probability": 0.38
},
{
"topic_id": 1,
"probability": 0.35
}
],
"dominant_topic": 0,
@@ -596780,10 +596784,6 @@
{
"topic_id": 0,
"probability": 0.43
},
{
"topic_id": 2,
"probability": 0.35
}
],
"dominant_topic": 0,
@@ -672275,11 +672275,11 @@
"topics": [
{
"topic_id": 2,
"probability": 0.42
"probability": 0.41
}
],
"dominant_topic": 2,
"dominant_probability": 0.42
"dominant_probability": 0.41
},
{
"document_id": 52182,

BIN
processed_texts_lowprep.pkl Normal file

Binary file not shown.