setwd("/Users/subasishdas1/Copy/Rpubs/rpubs/tm_rpubs")
### bt_tw1 <- read.csv("TRB_AM.csv")
bt_tw1 <- read.csv("TRB_AM5.csv")
names(bt_tw1)
dim(ttt)
dim(bt_tw1)
table(bt_tw1$Year.the.Paper.was.Presented)
tweets <- bt_tw1
all2<- data.frame(Year = unique(tweets$Year.the.Paper.was.Presented),
Title = tapply(tweets$Paper.Title, tweets$Year.the.Paper.was.Presented, paste, collapse = ' '))
head(all2)
names(all2)
library(tm)
mydata.corpus <- Corpus(VectorSource(all2$Title))
mydata.corpus <- tm_map(mydata.corpus, content_transformer(function(x) iconv(x, to='UTF-8-MAC', sub='byte')), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, content_transformer(tolower), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removePunctuation, preserve_intra_word_dashes=TRUE, mc.cores=1)
my_stopwords <- c(stopwords('german'),"the", "due", "are", "not", "for", "this", "and",
"that", "there", "new", "near", "beyond", "time", "from", "been", "both", "than", "set",
"has","now", "until", "all", "use", "two", "ave", "blvd", "east", "between", "ccc", "end", "have", "avenue", "before", "i-us", "i-e", "i-i-", "ames", "belle", "gen", "okeefe", "one", "just", "mac", "being", "i-i-", "left", "right", "west", "when","levels","remaining","based", "issues", "still", "off", "over", "only", "north", "past", "twin", "while", "i-w" , "general" , "harvey", "i-e","i-i-","i-us" , "must", "more", "work","read", "reached", "morrison", "mph", "three","info", "canal", "camp", "la-", "approximately", "amp", "access", "approaching", "forest", "friday", "its", "affect", "after", "within", "what", "various", "under", "toward", "san", "other" , "city", "into", "by", "for", "is", "are", "their", "he", "she", "research", "through", "between", "under", "below", "over", "with", "an", "affect", "nowadays", "present", "important", "significant", "then", "using", "having", "via", "vermont", "some", "rap", "how", "can","task","type","next","limit","areas","real-","samll","method","case", "california","canada","used","effect","variable","state","factors","zone","analysis","test","methods")
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords, mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removeNumbers, mc.cores=1)
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
dim(mydata.dtm)
DTM <- DocumentTermMatrix(mydata.corpus)
library(wordcloud)
term.matrix <- TermDocumentMatrix(mydata.corpus)
term.matrix <- as.matrix(term.matrix)
head(term.matrix)
colnames(term.matrix) <- c("TRB09-10", "TRB11-12","TRB13-14")
comparison.cloud(term.matrix,max.words=300, random.order=FALSE)
findFreqTerms(mydata.dtm, lowfreq=500)
findAssocs(mydata.dtm, 'traffic', 0.99)
findAssocs(mydata.dtm, 'analysis', 0.99)
findAssocs(mydata.dtm, 'model', 0.98)
findAssocs(mydata.dtm, 'pavement', 0.96)
tweets <- read.csv("TRB_AM_COMM.csv")
names(tweets)
all2<- data.frame(Year = unique(tweets$Year.the.Paper.was.Presented),
Comm= tapply(tweets$Reviewing.Committee.s.Name, tweets$Year.the.Paper.was.Presented, paste, collapse = ' '))
head(all2)
names(all2)
require(tm)
mydata.corpus <- Corpus(VectorSource(all2$Comm))
mydata.corpus <- tm_map(mydata.corpus, content_transformer(function(x) iconv(x, to='UTF-8-MAC', sub='byte')), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, content_transformer(tolower), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removePunctuation, preserve_intra_word_dashes=TRUE, mc.cores=1)
my_stopwords <- c(stopwords('german'),"the", "due", "are", "not", "for", "this", "and", "papers","reveiw",
"that", "there", "new", "near", "beyond", "time", "from", "been", "both", "than", "review","subcommittee",
"has","now", "until", "all", "use", "two", "ave", "blvd", "east", "between", "ccc", "end", "have", "avenue", "before", "i-us", "i-e", "i-i-", "ames", "belle", "gen", "okeefe", "one", "just", "mac", "being", "i-i-", "left", "right", "west", "when","levels","remaining","based", "issues", "still", "off", "over", "only", "north", "past", "twin", "while", "i-w" , "general" , "harvey", "i-e","i-i-","i-us" , "must", "more", "work","read", "reached", "morrison", "mph", "three","info", "canal", "camp", "la-", "approximately", "amp", "access", "approaching", "forest", "friday", "its", "affect", "after", "within", "what", "various", "under", "toward", "san", "other" , "city", "into", "by", "for", "is", "are", "their", "he", "she", "research", "through", "between", "under", "below", "over", "with", "an", "affect", "nowadays", "present", "important", "significant", "then", "using", "having", "via", "vermont", "some", "rap", "how", "can", "inc", "transportation", "advanced", "applied" , "asphalt", "associates", "association", "authority", "center", "central", "cities", "college" , "commission", "company", "construction", "consultant" , "consultants" , "consulting" , "corporation", "council" , "county" , "department" ,
"development" , "engineering" , "group", "highway", "icf" , "imperial" , "inc", "innovation", "institute", "international", "kth" ,
"laboratory" ,"llc" ,"los" ,"ltd" , "metropolitan","ministry","national" , "old" , "park" ,"parsons" , "planning" , "polytechnic" ,
"polytechnique" ,"resource", "road" , "royal" ,
"safety" , "santa", "science" ,
"state", "systematics", "systems" , "tech" , "technical" , "technological" , "technology" , "toronto" , "transit" , "transport" , "transportation" , "united" , "universidad",
"universitat", "university","ahd","paper", "ahn", "special","call","ahd","activities","cfp","toledo","geroliminis")
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords, mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removeNumbers, mc.cores=1)
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
mydata.dtm
dim(mydata.dtm)
ddtm <- as.matrix(mydata.dtm)
write.csv(ddtm, "comm_name.csv")
DTM <- DocumentTermMatrix(mydata.corpus)
findFreqTerms(mydata.dtm, lowfreq=100)
library(wordcloud)
term.matrix <- TermDocumentMatrix(mydata.corpus)
term.matrix <- as.matrix(term.matrix)
head(term.matrix)
colnames(term.matrix) <- c("TRB09-10", "TRB11-12","TRB13-14")
comparison.cloud(term.matrix,max.words=60, random.order=FALSE)
names(tweets)
all2<- data.frame(Year = unique(tweets$Year.the.Paper.was.Presented),
Code= tapply(tweets$Reviewing.Committee.s.Code, tweets$Year.the.Paper.was.Presented, paste, collapse = ' '))
head(all2)
names(all2)
require(tm)
mydata.corpus <- Corpus(VectorSource(all2$Code))
mydata.corpus <- tm_map(mydata.corpus, content_transformer(function(x) iconv(x, to='UTF-8-MAC', sub='byte')), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, content_transformer(tolower), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removePunctuation, preserve_intra_word_dashes=TRUE, mc.cores=1)
my_stopwords <- c(stopwords('german'),"the", "due", "are", "not", "for", "this", "and",
"that", "there", "new", "near", "beyond", "time", "from", "been", "both", "than",
"has","now", "until", "all", "use", "two", "ave", "blvd", "east", "between", "ccc", "end", "have", "avenue", "before", "i-us", "i-e", "i-i-", "ames", "belle", "gen", "okeefe", "one", "just", "mac", "being", "i-i-", "left", "right", "west", "when","levels","remaining","based", "issues", "still", "off", "over", "only", "north", "past", "twin", "while", "i-w" , "general" , "harvey", "i-e","i-i-","i-us" , "must", "more", "work","read", "reached", "morrison", "mph", "three","info", "canal", "camp", "la-", "approximately", "amp", "access", "approaching", "forest", "friday", "its", "affect", "after", "within", "what", "various", "under", "toward", "san", "other" , "city", "into", "by", "for", "is", "are", "their", "he", "she", "research", "through", "between", "under", "below", "over", "with", "an", "affect", "nowadays", "present", "important", "significant", "then", "using", "having", "via", "vermont", "some", "rap", "how", "can", "inc", "transportation", "advanced", "applied" , "asphalt", "associates", "association", "authority", "center", "central", "cities", "college" , "commission", "company", "construction", "consultant" , "consultants" , "consulting" , "corporation", "council" , "county" , "department" ,
"development" , "engineering" , "group", "highway", "icf" , "imperial" , "inc", "innovation", "institute", "international", "kth" ,
"laboratory" ,"llc" ,"los" ,"ltd" , "metropolitan","ministry","national" , "old" , "park" ,"parsons" , "planning" , "polytechnic" ,
"polytechnique" ,"resource", "road" , "royal" ,
"safety" , "santa", "science" ,
"state", "systematics", "systems" , "tech" , "technical" , "technological" , "technology" , "toronto" , "transit" , "transport" , "transportation" , "united" , "universidad",
"universitat", "university")
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords, mc.cores=1)
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
mydata.dtm
dim(mydata.dtm)
ddtm <- as.matrix(mydata.dtm)
write.csv(ddtm, "comm_name.csv")
DTM <- DocumentTermMatrix(mydata.corpus)
findFreqTerms(mydata.dtm, lowfreq=5)
library(wordcloud)
term.matrix <- TermDocumentMatrix(mydata.corpus)
term.matrix <- as.matrix(term.matrix)
head(term.matrix)
colnames(term.matrix) <- c("TRB09-10", "TRB11-12","TRB13-14")
comparison.cloud(term.matrix,max.words=100, random.order=FALSE)
#### for NOLA
require(tm)
mydata.corpus <- Corpus(VectorSource(tweets$Paper.Title))
mydata.corpus <- tm_map(mydata.corpus, content_transformer(function(x) iconv(x, to='UTF-8-MAC', sub='byte')), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, content_transformer(tolower), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removePunctuation, preserve_intra_word_dashes=TRUE, mc.cores=1)
my_stopwords <- c(stopwords('german'),"the", "due", "are", "not", "for", "this", "and",
"that", "there", "new", "near", "beyond", "time", "from", "been", "both", "than",
"has","now", "until", "all", "use", "two", "ave", "blvd", "east", "between", "ccc", "end", "have", "avenue", "before", "i-us", "i-e", "i-i-", "ames", "belle", "gen", "okeefe", "one", "just", "mac", "being", "i-i-", "left", "right", "west", "when","levels","remaining","based", "issues", "still", "off", "over", "only", "north", "past", "twin", "while", "i-w" , "general" , "harvey", "i-e","i-i-","i-us" , "must", "more", "work","read", "reached", "morrison", "mph", "three","info", "canal", "camp", "la-", "approximately", "amp", "access", "approaching", "forest", "friday", "its", "affect", "after", "within", "what", "various", "under", "toward", "san", "other" , "city", "into", "by", "for", "is", "are", "their", "he", "she", "research", "through", "between", "under", "below", "over", "with", "an", "affect", "nowadays", "present", "important", "significant", "then", "using", "having", "via", "vermont", "some", "rap", "how", "can", "inc", "transportation", "advanced", "applied" , "asphalt", "associates", "association", "authority", "center", "central", "cities", "college" , "commission", "company", "construction", "consultant" , "consultants" , "consulting" , "corporation", "council" , "county" , "department" ,
"development" , "engineering" , "group", "highway", "icf" , "imperial" , "inc", "innovation", "institute", "international", "kth" ,
"laboratory" ,"llc" ,"los" ,"ltd" , "metropolitan","ministry","national" , "old" , "park" ,"parsons" , "planning" , "polytechnic" ,
"polytechnique" ,"resource", "road" , "royal" ,
"safety" , "santa", "science" ,
"state", "systematics", "systems" , "tech" , "technical" , "technological" , "technology" , "toronto" , "transit" , "transport" , "transportation" , "united" , "universidad",
"universitat", "university")
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords, mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removeNumbers, mc.cores=1)
# build a term-document matrix
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
mydata.dtm
dim(mydata.dtm)
library(topicmodels)
dtm <- as.DocumentTermMatrix(mydata.dtm)
library(topicmodels)
lda <- LDA(dtm, k = 6) # find 8 topics
term <- terms(lda,4) # first 4 terms of every topic
term
term <- apply(term, MARGIN = 2, paste, collapse = ", ")
# first topic identified for every document (tweet)
require(data.table) #fore IDate
topic <- topics(lda, 1)
topics <- data.frame(Year=tweets$Year.the.Paper.was.Presented, topic)
library(ggplot2)
qplot(Year, ..count.., data=topics, geom="density",
fill=term[topic], position="stack")+theme_bw()+
theme(panel.background = element_blank()) +
theme(axis.ticks.x = element_blank())+ theme(axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12))
setwd("/Users/subasishdas1/Copy/Rpubs/rpubs/tm_rpubs")
bt_tw1 <- read.csv("TRB_AM.csv")
head(bt_tw1)
names(bt_tw1)
dim(bt_tw1)
tweets <- subset(bt_tw1, Year.the.Paper.was.Presented >2005)
table(tweets$Year.the.Paper.was.Presented)
dim(tweets)
tweets2 <- tweets[sample(nrow(tweets), 3000),]
dim(tweets2)
table(tweets2$Year.the.Paper.was.Presented)
## Sample 10,000 Papers
write.csv(tweets, "10000_pap.csv")
names(tweets2)
all2<- data.frame(Year = unique(tweets2$Year.the.Paper.was.Presented),
Abstract = tapply(tweets2$Paper.Abstract, tweets2$Year.the.Paper.was.Presented, paste, collapse = ' '))
head(all2)
names(all2)
dim(all2)
require(tm)
mydata.corpus <- Corpus(VectorSource(all2$Abstract))
mydata.corpus <- tm_map(mydata.corpus, content_transformer(function(x) iconv(x, to='UTF-8-MAC', sub='byte')), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, content_transformer(tolower), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removePunctuation, preserve_intra_word_dashes=TRUE, mc.cores=1)
my_stopwords <- c(stopwords('german'),"the", "due", "are", "not", "for", "this", "and",
"that", "there", "new", "near", "beyond", "time", "from", "been", "both", "than",
"has","now", "until", "all", "use", "two", "ave", "blvd", "east", "between", "ccc", "end", "have", "avenue", "before", "i-us", "i-e", "i-i-", "ames", "belle", "gen", "okeefe", "one", "just", "mac", "being", "i-i-", "left", "right", "west", "when","levels","remaining","based", "issues", "still", "off", "over", "only", "north", "past", "twin", "they", "alf" ,"found","while", "i-w" , "general" , "harvey", "i-e","i-i-","i-us" , "must", "more", "work","read", "reached", "morrison", "mph", "three","info", "canal", "camp", "la-", "approximately", "amp", "access", "approaching", "forest", "friday", "its", "affect", "after", "within", "what", "various", "under", "toward", "san", "other" , "city", "into", "by", "for", "is", "are", "their", "he", "she", "research", "through", "between", "under", "below", "over", "with", "an", "affect", "nowadays", "present", "important", "significant", "then", "using", "having", "via", "vermont", "some", "rap", "how", "can")
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords, mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removeNumbers, mc.cores=1)
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
mydata.dtm
dim(mydata.dtm)
DTM <- DocumentTermMatrix(mydata.corpus)
findFreqTerms(mydata.dtm, lowfreq=2000)
findAssocs(mydata.dtm, 'traffic', 0.98)
findAssocs(mydata.dtm, 'analysis', 0.98)
findAssocs(mydata.dtm, 'model', 0.99)
findAssocs(mydata.dtm, 'evaluation', 0.98)
findAssocs(mydata.dtm, 'pavement', 0.98)
mydata.dtm2 <- removeSparseTerms(mydata.dtm, sparse=0.14)
mydata.dtm2
dim(mydata.dtm2)
inspect(mydata.dtm2[1:7,1:7])
library(slam)
TDM.dense <- as.matrix(mydata.dtm2)
TDM.dense
object.size(mydata.dtm2)
write.csv(TDM.dense, "Abs_2000_sparse.csv")
#### PLOT 1
tdm_BT <- read.csv("Abs_2000_sparse_1.csv")
head(tdm_BT)
dim(tdm_BT)
tdm_BT_1 <- tdm_BT[c(1,9 )]
head(tdm_BT_1)
p <- ggplot(tdm_BT_1, aes(x = reorder(Term, -Count), y = Count))
p <- p + geom_bar(stat = "identity")+theme_bw()
p <- p + theme(axis.text.x=element_text(angle=65, hjust=1))
p + labs(title = "Most Frequent Terms")+xlab("Terms")+
theme(axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 12))
###PLOT 2
library(reshape)
tdm_BTT <- read.csv("Abs_2000_sparse_2.csv")
tdm_BT_2= melt(tdm_BTT[1:8], value.name = "count")
head(tdm_BT_2)
ggplot(tdm_BT_2, aes(x = variable, y = Term, fill = value)) +
geom_tile(colour = "white") +
scale_fill_gradient(high="#FF0000" , low="#FFFFFF")+
ylab("")+ xlab("TRB Annual Meeting")+
theme(panel.background = element_blank()) +
theme(axis.ticks.x = element_blank())+ theme(axis.text.y = element_text(size = 12))
### PLOT 3
library(wordcloud)
term.matrix <- TermDocumentMatrix(mydata.corpus)
term.matrix <- as.matrix(term.matrix)
head(term.matrix)
colnames(term.matrix) <- c("TRB2008","TRB2009", "TRB2010","TRB2011","TRB2012",
"TRB2013","TRB2014")
comparison.cloud(term.matrix,max.words=200, random.order=FALSE)
############# wordcloud
mydata.dtm2 <- removeSparseTerms(mydata.dtm, sparse=0.15)
mydata.dtm2
dim(mydata.dtm2)
inspect(mydata.dtm2[1:7,1:7])
library(slam)
TDM.dense <- as.matrix(mydata.dtm2)
TDM.dense
object.size(mydata.dtm2)
object.size(TDM.dense)
inspect(DocumentTermMatrix(mydata.corpus,
list(dictionary = c("traffic", "analysis", "model","evaluation", "pavement"))))
##################################################333333
write.csv(TDM.dense, "sparse_1.csv")
#### PLOT 1
tdm_BT <- read.csv("sparse_3.csv")
head(tdm_BT)
dim(tdm_BT)
tdm_BT_1 <- tdm_BT[c(1,9 )]
head(tdm_BT_1)
library(ggplot2)
p <- ggplot(tdm_BT_1, aes(x = reorder(Term, -Count), y = Count))
p <- p + geom_bar(stat = "identity")+theme_bw()
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
p + labs(title = "Most Frequent Terms")+xlab("Terms")+
theme(axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 12))
###PLOT 2
library(reshape)
tdm_BTT <- read.csv("sparse_3a.csv")
tdm_BT_2= melt(tdm_BTT[1:8], value.name = "count")
head(tdm_BT_2)
ggplot(tdm_BT_2, aes(x = variable, y = Term, fill = value)) +
geom_tile(colour = "white") +
scale_fill_gradient(high="#FF0000" , low="#FFFFFF")+
ylab("")+ xlab("TRB Annual Meeting")+
theme(panel.background = element_blank()) +
theme(axis.ticks.x = element_blank())+ theme(axis.text.y = element_text(size = 12))
####PLOT 3
tdm_BT_3 <- tdm_BT[1:8]
head(tdm_BT_3)
rownames(tdm_BT_3) <- tdm_BT_3$Term
tdm_BT_4 <- tdm_BT_3[2:8]
head(tdm_BT_4)
mydata.df.scale <- scale(tdm_BT_4)
d <- dist(mydata.df.scale, method = "euclidean") # distance matrix
fit <- hclust(d, method="ward.D")
plot(fit) # display dendogram?
rect.hclust(fit, k = 10)
require(tm)
mydata.corpus <- Corpus(VectorSource(tweets2$Paper.Abstract))
mydata.corpus <- tm_map(mydata.corpus, content_transformer(function(x) iconv(x, to='UTF-8-MAC', sub='byte')), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, content_transformer(tolower), mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removePunctuation, preserve_intra_word_dashes=TRUE, mc.cores=1)
my_stopwords <- c(stopwords('german'),"the", "due", "are", "not", "for", "this", "and",
"that", "there", "new", "near", "beyond", "time", "from", "been", "both", "than",
"has","now", "until", "all", "use", "two", "ave", "blvd", "east", "between", "ccc", "end", "have", "avenue", "before", "i-us", "i-e", "i-i-", "ames", "belle", "gen", "okeefe", "one", "just", "mac", "being", "i-i-", "left", "right", "west", "when","levels","remaining","based", "issues", "still", "off", "over", "only", "north", "past", "twin", "while", "i-w" , "general" , "harvey", "i-e","i-i-","i-us" , "must", "more", "work","read", "reached", "morrison", "mph", "three","info", "canal", "camp", "la-", "approximately", "amp", "access", "approaching", "forest", "friday", "its", "affect", "after", "within", "what", "various", "under", "toward", "san", "other" , "city", "into", "by", "for", "is", "are", "their", "he", "she", "research", "through", "between", "under", "below", "over", "with", "an", "affect", "nowadays", "present", "important", "significant", "then", "using", "having", "via", "vermont", "some", "rap", "how", "can", "inc", "transportation", "advanced", "applied" , "asphalt", "associates", "association", "authority", "center", "central", "cities", "college" , "commission", "company", "construction", "consultant" , "consultants" , "consulting" , "corporation", "council" , "county" , "department" , "such",
"development" , "engineering" , "group", "highway", "icf" , "imperial" , "inc", "innovation", "institute", "international", "kth" ,
"laboratory" ,"llc" ,"los" ,"ltd" , "metropolitan","ministry","national" , "old" , "park" ,"parsons" , "planning" , "polytechnic" ,
"polytechnique" ,"resource", "road" , "royal" ,
"safety" , "santa", "science" ,
"state", "systematics", "systems" , "tech" , "technical" , "technological" , "technology" , "toronto" , "transit" , "transport" , "transportation" , "united" , "universidad",
"universitat", "university", "different", "study", "results", "were","which", "these", "used", "paper", "test", "bus", "proposed")
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords, mc.cores=1)
mydata.corpus <- tm_map(mydata.corpus, removeNumbers, mc.cores=1)
# build a term-document matrix
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
mydata.dtm
dim(mydata.dtm)
library(topicmodels)
dtm <- as.DocumentTermMatrix(mydata.dtm)
library(topicmodels)
lda <- LDA(dtm, k = 8) # find 8 topics
term <- terms(lda,6) # first 4 terms of every topic
term
term <- apply(term, MARGIN = 2, paste, collapse = ", ")
# first topic identified for every document (tweet)
require(data.table) #fore IDate
topic <- topics(lda, 1)
topics <- data.frame(Year=tweets$Year.the.Paper.was.Presented, topic)
library(ggplot2)
qplot(Year, ..count.., data=topics, geom="density",
fill=term[topic], position="stack")+theme_bw()+
theme(panel.background = element_blank()) +
theme(axis.ticks.x = element_blank())+ theme(axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12))
############# wordcloud
library(wordcloud)
tdm <- TermDocumentMatrix(mydata.corpus)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
wordcloud(d$word,d$freq)
pal <- brewer.pal(9,"BuGn")
pal <- pal[-(1:4)]
wordcloud(d$word,d$freq,c(8,.3),2,,FALSE,,.15,pal)
pal <- brewer.pal(6,"Dark2")
pal <- pal[-(1)]
wordcloud(d$word,d$freq,c(8,.3),2,,TRUE,,.15,pal)
#random colors
wordcloud(d$word,d$freq,c(8,.3),2,,TRUE,TRUE,.15,pal)
##### with font #####
wordcloud(d$word,d$freq,c(8,.3),2,,TRUE,,.15,pal,
vfont=c("gothic english","plain"))
wordcloud(d$word,d$freq,c(8,.3),2,100,TRUE,,.15,pal,vfont=c("script","plain"))
wordcloud(d$word,d$freq,c(8,.3),2,100,TRUE,,.15,pal,vfont=c("serif","plain"))
Conducted by: Subasish Das