http://quantlet.de/index.php?p=info
https://eight2late.wordpress.com/2015/12/02/a-gentle-introduction-to-network-graphs-using-r-and-gephi/
autori<-corpusVars$Autor In loc de filename
autori
filekey <- cbind(rownames(m),autori)
# Prefer fixed to scientific notation
options(scipen=5)
# Print numbers with two significant digits
options(digits=2)
options(R2HTML.format.digits=2)
# Set a nice color palette for plots
lattice.options(default.theme=latticeExtra::custom.theme(symbol=RColorBrewer::brewer.pal(8,
"Set1")[c(2:1, 3:5, 7:9)], fill=RColorBrewer::brewer.pal(8, "Set1")[c(2:1,
3:5, 7:9)], region=RColorBrewer::brewer.pal(n=11, name="Spectral")))
rm(lengths)
library(RODBC)
channel <-
odbcConnectExcel("C:/Users/c.c/Documents/DateKnime/adevarulCOMENTARII_Siriamartie21.xls")
corpusDataset <- sqlQuery(channel=channel, "select * from [Sheet1$]")
odbcCloseAll()
corpus <- Corpus(DataframeSource(corpusDataset["TextComentariu"]),
readerControl=list(language="ro"))
corpusVars <- corpusDataset[!names(corpusDataset) == "TextComentariu"]
activeDataSet("corpusVars")
names(corpusVars) <- make.names(names(corpusVars))
setCorpusVariables()
dtmCorpus <- corpus
dtmCorpus <- tm_map(dtmCorpus, content_transformer(tolower))
dtmCorpus <- tm_map(dtmCorpus, content_transformer(function(x)
gsub("(['?\n<U+202F><U+2009>]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+", " ",
x)))
dtmCorpus <- tm_map(dtmCorpus, removeNumbers)
dtm <- DocumentTermMatrix(dtmCorpus, control=list(tolower=FALSE,
wordLengths=c(2, Inf)))
rm(dtmCorpus)
dictionary <- data.frame(row.names=colnames(dtm),
"Occurrences"=col_sums(dtm), "Stopword"=ifelse(colnames(dtm) %in%
stopwords("ro"), "Stopword", ""), stringsAsFactors=FALSE)
dtm <- dtm[, !colnames(dtm) %in% stopwords("ro")]
attr(dtm, "dictionary") <- dictionary
rm(dictionary)
meta(corpus, type="corpus", tag="language") <- attr(dtm, "language") <- "ro"
meta(corpus, type="corpus", tag="processing") <- attr(dtm, "processing") <-
c(lowercase=TRUE, punctuation=TRUE, digits=TRUE, stopwords=TRUE,
stemming=FALSE, customStemming=FALSE, twitter=FALSE, removeHashtags=NA,
removeNames=NA)
corpus
dtm
m<-as.matrix(dtm)
write.csv(m,file="E:/dtmEight2Late.csv")
filekey <- cbind(rownames(m),filenames)
write.csv(filekey,"E:/filekey.csv")
library(relimp, pos=21)
showData(corpusVars, placement='-20+200', font=getRcmdr('logFont'),
maxwidth=80, maxheight=30)
autori<-corpusVars$Autor
autori
filekey <- cbind(rownames(m),autori)
write.csv(filekey,"e:/filekey.csv")
cosineSim <- function(x){
as.dist(x%*%t(x)/(sqrt(rowSums(x^2) %*% t(rowSums(x^2)))))
}
cs <- cosineSim(m)
write.csv(as.matrix(cs),file="E:/csEight2Late.csv")
cs[cs < max(cs)/2] <- 0
cs <- round(cs,3)
write.csv(as.matrix(cs),file="e:AdjacencyMatrix.csv")
dat=read.csv(file.choose(),header=TRUE,row.names=1,check.names=FALSE)
m1=as.matrix(dat) # coerces the data set as a matrix
g=graph.adjacency(m1,mode="undirected",weighted=NULL) # this will create an 'igraph object'
g
http://www.slideshare.net/rdatamining/text-mining-with-r-an-analysis-of-twitter-data
http://www.n3labs.com/pdf/rank-co-occur.pdf
https://cran.r-project.org/web/packages/cooccur/cooccur.pdf
https://eight2late.wordpress.com/2015/09/29/a-gentle-introduction-to-topic-modeling-using-r/
http://www.rdatamining.com/examples/text-mining
http://www.linguisticdna.org/2015/09/10/proximity-data-ii-co-occurrence-and-distance-measurements/
https://eight2late.wordpress.com/2015/05/27/a-gentle-introduction-to-text-mining-using-r/
http://faculty.washington.edu/jwilker/CAP/R_Sample_Script.R
No comments:
Post a Comment