mirror of
https://github.com/marvinscham/masterthesis-playground.git
synced 2025-12-06 02:00:50 +01:00
Refine preprocessing, add LDA
This commit is contained in:
53
bali_ner.json
Normal file
53
bali_ner.json
Normal file
@@ -0,0 +1,53 @@
|
||||
[
|
||||
"ubud",
|
||||
"kuta",
|
||||
"seminyak",
|
||||
"canggu",
|
||||
"sanur",
|
||||
"denpasar",
|
||||
"jimbaran",
|
||||
"lovina",
|
||||
"amed",
|
||||
"sidemen",
|
||||
"uluwatu",
|
||||
"nusa",
|
||||
"legian",
|
||||
"tabanan",
|
||||
"bedugul",
|
||||
"pemuteran",
|
||||
"tanah",
|
||||
"besakih",
|
||||
"goa",
|
||||
"tirta",
|
||||
"tegallalang",
|
||||
"lempuyang",
|
||||
"agung",
|
||||
"batur",
|
||||
"bratan",
|
||||
"sekumpul",
|
||||
"munduk",
|
||||
"batubulan",
|
||||
"celuk",
|
||||
"tegenungan",
|
||||
"gitgit",
|
||||
"singaraja",
|
||||
"padang",
|
||||
"kerobokan",
|
||||
"penida",
|
||||
"lembongan",
|
||||
"ceningan",
|
||||
"garuda",
|
||||
"ulun",
|
||||
"bajra",
|
||||
"kintamani",
|
||||
"taman",
|
||||
"saraswati",
|
||||
"pandawa",
|
||||
"melasti",
|
||||
"dreamland",
|
||||
"balangan",
|
||||
"bingin",
|
||||
"suluban",
|
||||
"menjangan",
|
||||
"jatiluwih"
|
||||
]
|
||||
137757
bertopic.ipynb
137757
bertopic.ipynb
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
3885
bertopic/visualization.html
Normal file
3885
bertopic/visualization.html
Normal file
File diff suppressed because one or more lines are too long
BIN
lda_output/lda_corpus.pkl
Normal file
BIN
lda_output/lda_corpus.pkl
Normal file
Binary file not shown.
BIN
lda_output/lda_dictionary.gensim
Normal file
BIN
lda_output/lda_dictionary.gensim
Normal file
Binary file not shown.
BIN
lda_output/lda_model.gensim
Normal file
BIN
lda_output/lda_model.gensim
Normal file
Binary file not shown.
BIN
lda_output/lda_model.gensim.expElogbeta.npy
Normal file
BIN
lda_output/lda_model.gensim.expElogbeta.npy
Normal file
Binary file not shown.
BIN
lda_output/lda_model.gensim.id2word
Normal file
BIN
lda_output/lda_model.gensim.id2word
Normal file
Binary file not shown.
BIN
lda_output/lda_model.gensim.state
Normal file
BIN
lda_output/lda_model.gensim.state
Normal file
Binary file not shown.
41
lda_output/lda_vis_3_topics.html
Normal file
41
lda_output/lda_vis_3_topics.html
Normal file
File diff suppressed because one or more lines are too long
730461
lda_output/topic_to_reviews.json
Normal file
730461
lda_output/topic_to_reviews.json
Normal file
File diff suppressed because one or more lines are too long
3
lda_output/topics.txt
Normal file
3
lda_output/topics.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
(0, '0.191*"temple" + 0.102*"view" + 0.079*"sunset" + 0.061*"cliff" + 0.041*"uluwatu" + 0.031*"dance" + 0.030*"kecak_dance" + 0.027*"tourist" + 0.015*"hour" + 0.013*"sun"')
|
||||
(1, '0.052*"sea" + 0.041*"ocean" + 0.038*"guide" + 0.036*"bit" + 0.033*"water" + 0.031*"location" + 0.027*"beach" + 0.025*"wave" + 0.021*"day" + 0.014*"rock"')
|
||||
(2, '0.174*"monkey" + 0.046*"time" + 0.030*"people" + 0.028*"lot" + 0.026*"visit" + 0.022*"glass" + 0.016*"sunglass" + 0.016*"photo" + 0.015*"trip" + 0.014*"day"')
|
||||
BIN
processed_texts.pkl
Normal file
BIN
processed_texts.pkl
Normal file
Binary file not shown.
1
stopwords-en.json
Normal file
1
stopwords-en.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user