Date Mici

Thursday, March 12, 2020

Tuesday, January 9, 2018

fACES ml

FaceNet
http://www.dailymail.co.uk/sciencetech/article-3003053/Google-claims-FaceNet-perfected-recognising-human-faces-accurate-99-96-time.html

https://hackernoon.com/building-a-facial-recognition-pipeline-with-deep-learning-in-tensorflow-66e7645015b8
http://aiehive.com/deep-face-recognition-using-deep-convolution-neural-network/

http://cs.wellesley.edu/~vision/slides/Qianli_summary_deep_face_models.pdf
https://people.cs.umass.edu/~elm/papers/LFW_survey.pdf

http://vis-www.cs.umass.edu/lfw/index.html

http://www.kinfacew.com/protocol.html
https://books.google.ro/books?id=oiYmDwAAQBAJ&pg=PA25&lpg=PA25&dq=UB+KinFace&source=bl&ots=9aWvwfh1Na&sig=V01b41MPWnguPv5hvAFSfXcba_c&hl=en&sa=X&ved=0ahUKEwi7lp2LvMrYAhVkDMAKHZyQCfM4ChDoAQgpMAE#v=onepage&q=UB%20KinFace&f=false
http://www1.ece.neu.edu/~yunfu/research/Kinface/Kinface.htm

dATA BASES:

UB KinFace Dataset [19],

Cornell Kinship Dataset [15],

KinFace-I [24], and

KinFace-II [24].

https://github.com/visionjo/FIW_KRT
https://web.northeastern.edu/smilelab/RFIW2018/

Monday, October 23, 2017

Magia CINE

Regex filtrare comentarii in Knime.
removeChars($FIELDsau Camp$,"/[\/#!$%\^&\*{}=\-_`~()“”,;\"]/")

Sunday, December 18, 2016

Campania electorala 2016

corpus <- Corpus(DirSource("E:/Big Data/texte", encoding="windows-1250"),

readerControl=list(language="ro"))

corpusVars <- data.frame(var1=factor(rep("", length(corpus))),

row.names=names(corpus))

activeDataSet("corpusVars")

setCorpusVariables()

corpus <- splitTexts(corpus, 20)

meta(corpus, type="corpus", tag="split") <- TRUE

dtmCorpus <- corpus

dtmCorpus <- tm_map(dtmCorpus, content_transformer(tolower))

dtmCorpus <- tm_map(dtmCorpus, content_transformer(function(x)

gsub("(['’\n<U+202F><U+2009>]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+", " ",

x)))

dtmCorpus <- tm_map(dtmCorpus, removeNumbers)

dtm <- DocumentTermMatrix(dtmCorpus, control=list(tolower=FALSE,

wordLengths=c(2, Inf)))

rm(dtmCorpus)

library(SnowballC)

dictionary <- data.frame(row.names=colnames(dtm),

"Occurrences"=col_sums(dtm), "Stemmed.Term"=wordStem(colnames(dtm), "ro"),

"Stopword"=ifelse(colnames(dtm) %in% stopwords("ro"), "Stopword", ""),

stringsAsFactors=FALSE)

dtm <- dtm[, !colnames(dtm) %in% stopwords("ro")]

dtm <- rollup(dtm, 2, dictionary[colnames(dtm), 2])

attr(dtm, "dictionary") <- dictionary

rm(dictionary)

meta(corpus, type="corpus", tag="language") <- attr(dtm, "language") <- "ro"

meta(corpus, type="corpus", tag="processing") <- attr(dtm, "processing") <-

c(lowercase=TRUE, punctuation=TRUE, digits=TRUE, stopwords=TRUE,

stemming=TRUE, customStemming=FALSE, twitter=FALSE, removeHashtags=NA,

removeNames=NA)

corpus

dtm

graphplot <-

function(x,

terms = sample(Terms(x), 20),

corThreshold = 0.7,

weighted=TRUE,

diag=FALSE,

...)

{

if (system.file(package = "igraph") == "")

stop("Plotting requires package 'igraph'.")

m <- if (inherits(x, "TermDocumentMatrix")) t(x) else x

m <- as.matrix(m[, terms])

c <- cor(m)

c[c < corThreshold] <- 0

c[is.na(c)] <- 0

diag(c) <- 0

tmgraph <- graph.adjacency(c, mode=c("undirected"), weighted=TRUE, diag=FALSE,

add.colnames=NULL, add.rownames=NA)

plot(tmgraph)

invisible(tmgraph)

}

library(igraph)

(freq.terms <- findFreqTerms(dtm, lowfreq = 20))

agraph <- graphplot(dtm,term = freq.terms,corThreshold = 0.2)

write.graph(agraph,"e:/agraphPoliticAdevarul.graphml", format=c("graphml"))

Thursday, June 30, 2016

Guardian comments

# Prefer fixed to scientific notation
options(scipen=5)
# Print numbers with two significant digits
options(digits=2)
options(R2HTML.format.digits=2)
# Set a nice color palette for plots
lattice.options(default.theme=latticeExtra::custom.theme(symbol=RColorBrewer::brewer.pal(8,
   "Set1")[c(2:1, 3:5, 7:9)], fill=RColorBrewer::brewer.pal(8, "Set1")[c(2:1,
3:5, 7:9)], region=RColorBrewer::brewer.pal(n=11, name="Spectral")))
rm(corpus, corpusVars, dtm, lengths)
corpusDataset <-
read.csv("C:/Users/cristian.chirita/Documents/guardian comments2.csv",
fileEncoding="UTF-8")
rm(lengths)
library(RODBC)
channel <-
odbcConnectExcel("C:/Users/cristian.chirita/Documents/guardian comments2.xls")
corpusDataset <- sqlQuery(channel=channel,
"select * from [guardian comments2$]")
odbcCloseAll()
corpus <- Corpus(DataframeSource(corpusDataset["p"]), readerControl=list(language="en"))
corpusVars <- corpusDataset[!names(corpusDataset) == "p"]
corpusVars <- corpusVars[c("Autor")]
activeDataSet("corpusVars")
setCorpusVariables()
dtmCorpus <- corpus
dtmCorpus <- tm_map(dtmCorpus, content_transformer(tolower))
dtmCorpus <- tm_map(dtmCorpus, content_transformer(function(x) gsub("(['?\n<U+202F><U+2009>]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+", " ", x)))
dtmCorpus <- tm_map(dtmCorpus, removeNumbers)
dtm <- DocumentTermMatrix(dtmCorpus, control=list(tolower=FALSE, wordLengths=c(2, Inf)))
rm(dtmCorpus)
dictionary <- data.frame(row.names=colnames(dtm), "Occurrences"=col_sums(dtm), "Stopword"=ifelse(colnames(dtm) %in% stopwords("en"), "Stopword", ""), stringsAsFactors=FALSE)
dtm <- dtm[, !colnames(dtm) %in% stopwords("en")]
attr(dtm, "dictionary") <- dictionary
rm(dictionary)
meta(corpus, type="corpus", tag="language") <- attr(dtm, "language") <- "en"
meta(corpus, type="corpus", tag="processing") <- attr(dtm, "processing") <- c(lowercase=TRUE, punctuation=TRUE, digits=TRUE, stopwords=TRUE, stemming=FALSE, customStemming=FALSE,
twitter=FALSE, removeHashtags=NA, removeNames=NA)
corpus
dtm
library(topicmodels)
#lda <- LDA(dtm, k = 10) # find 8 topics
library(qdap)
mytdm <- as.Corpus(dtm)
my_tdm <- TermDocumentMatrix(mytdm)
inspect(my_tdm)
(freq.terms <- findFreqTerms(my_tdm, lowfreq = 10))
term.freq <- rowSums(as.matrix(my_tdm))
term.freq <- subset(term.freq, term.freq >= 25)
df <- data.frame(term = names(term.freq), freq = term.freq)
library(ggplot2)
ggplot(df, aes(x = term, y = freq)) + geom_bar(stat = "identity") +
xlab("Terms") + ylab("Count") + coord_flip()
graphplot <-
function(x,
         terms = sample(Terms(x), 20),
         corThreshold = 0.7,
         weighted=TRUE,
   diag=FALSE,
         ...)
{
    if (system.file(package = "igraph") == "")
        stop("Plotting requires package 'igraph'.")
    m <- if (inherits(x, "TermDocumentMatrix")) t(x) else x
    m <- as.matrix(m[, terms])
    c <- cor(m)
    c[c < corThreshold] <- 0
    c[is.na(c)] <- 0
    diag(c) <- 0
tmgraph <- graph.adjacency(c, mode=c("undirected"), weighted=TRUE, diag=FALSE,
        add.colnames=NULL, add.rownames=NA)
        plot(tmgraph)

    invisible(tmgraph)
}
library(igraph)
(freq.terms <- findFreqTerms(dtm, lowfreq = 20))
agraph <- graphplot(dtm,term = freq.terms,corThreshold = 0.2)
write.graph(agraph,"e:/agraphGuardian1.graphml", format=c("graphml"))

Thursday, April 21, 2016

Perceptia Siriei in 5000 de comentarii Adevarul.ro

(freq.terms <- findFreqTerms(dtm, lowfreq = 30))
agraph <- graphplot(dtm,term = freq.terms,corThreshold = 0.1)
write.graph(agraph,"e:/agraphsiria.graphml", format=c("graphml"))