Wednesday, April 13, 2016

Acelasi Corpus cu Qdap

http://trinker.github.io/qdap/vignettes/qdap_vignette.html#import_export
require(qdap)
qdap_dat<- as.data.frame(corpus)
qview(qdap_dat)
(tm_dat)
sentSplit
(qdap_dat, "state", stem = TRUE)
bag_o_words(qdap_dat$text)
by(qdap_dat$text, bag_o_words)
head(corpus)
out <- ngrams(qdap_dat$text, 2)
lapply(out[["all_n"]], function(x) sapply(x, paste, collapse = " "))
DATA$state <- qdap_dat$text
rm_stopwords(qdap_dat$text, Top200Words)
ms <- c(" Siria ", "Rusia" ,"teroristi")
et <- c(" Turcia", " americani")
word_associate(qdap_dat$text, qdap_dat$docs, match.string = ms, wordcloud = TRUE,  proportional = TRUE, network.plot = TRUE,  nw.label.proportional = TRUE, extra.terms = et,cloud.legend =c("A", "B", "C"), title.color = "blue", cloud.colors = c("red", "purple", "gray70"))


DATA$state <- qdap_dat$text
rm_stopwords(qdap_dat$text, Top200Words)
ms <- c(" Siria ", "Rusia" ,"teroristi")
et <- c(" Turcia", " americani")
word_associate(qdap_dat$text, qdap_dat$docs, match.string = ms, wordcloud = TRUE,  proportional = TRUE, network.plot = TRUE,  nw.label.proportional = TRUE, extra.terms = et,cloud.legend =c("A", "B", "C"), title.color = "blue", cloud.colors = c("red", "purple", "gray70"))
term_match(text.var = qdap_dat$text, terms = qcv(siria, sirieni), return.list = FALSE)
term_match(qdap_dat$text, "america", FALSE)
with(qdap_dat, Dissimilarity(text, docs, method = "minkowski"))
dat <- qdap_dat[qdap_dat$text %in% qcv(Siria, Rusia),]
dat
(div.mod <- with(qdap_dat, diversity(docs, text)))
plot(div.mod, low = "yellow", grid = FALSE, values = TRUE)
words <- c("Siria", "rusia", "america", "turcia", "romania")
with(qdap_dat, word_cor(qdap_dat$text, x, words, r = .5))
x <- factor(with(qdap_dat, paste(text, pad(TOT(tot)), sep = "|")))
word_network_plot(text.var=qdap_dat$text, stopwords=NULL, label.cex = .95)
 
 

No comments:

Post a Comment