dtm
library(topicmodels)
lda <- LDA(dtm, k = 10) # find 8 topics
(term <- terms(lda, 10)) # first 6 terms of every topic
dtm <- dtm[, !colnames(dtm) %in% c("si", "in", "dl", "dupa", "face",
"arata")]
dtm <- dtm[, colnames(dtm) %in% c("cred", "spus", "asa", "poate", "doua")
dim(dtm)
summary(col_sums(dtm))
summary(col_sums(dtm))
term_tfidf <-
tapply(dtm$v/row_sums(dtm)[dtm$i], dtm$j, mean) *
log2(nDocs(dtm)/col_sums(dtm > 0))
summary(term_tfidf)
dtm <- dtm[,term_tfidf >= 0.1]
dtm <- dtm[row_sums(dtm) > 0,]
summary(col_sums(dtm))
dim(dtm)
k <- 30
SEED <- 2010
jss_TM <-
list(VEM = LDA(dtm, k = k, control = list(seed = SEED)),
VEM_fixed = LDA(dtm, k = k,
control = list(estimate.alpha = FALSE, seed = SEED)),
Gibbs = LDA(dtm, k = k, method = "Gibbs",
control = list(seed = SEED, burnin = 1000,
thin = 100, iter = 1000)),
CTM = CTM(dtm, k = k,
control = list(seed = SEED,
var = list(tol = 10^-4), em = list(tol = 10^-3))))
Topic <- topics(jss_TM[["Gibbs"]], 10)
Terms <- terms(jss_TM[["Gibbs"]], 10)
Terms[,1:10]
ctm <- CTM(dtm, 10, method = "VEM", control = NULL, model = NULL)
g <- build_graph(ctm, 0.9, and = TRUE)
No comments:
Post a Comment