## ----CAP394_books_libs, message=FALSE------------------------------------ library(data.table) library(ggplot2) ## ----CAP394_books_readdata1---------------------------------------------- data <- read.csv("Data/OldPubs/libgenbooks.csv", header = TRUE,stringsAsFactors=FALSE) ## ----CAP394_books_readdata2---------------------------------------------- str(data) ## ----CAP394_books_count1------------------------------------------------- filteredDS <- data[grep("Data Science", data$Title,ignore.case=TRUE), ] nrow(filteredDS) ## ----CAP394_books_count2------------------------------------------------- filteredDM <- data[grep("Data Mining", data$Title,ignore.case=TRUE), ] nrow(filteredDM) ## ----CAP394_books_count3------------------------------------------------- filteredBD <- data[grep("Big Data", data$Title,ignore.case=TRUE), ] nrow(filteredBD) ## ----CAP394_books_count4------------------------------------------------- filteredAI <- data[grep("Artificial Intelligence", data$Title,ignore.case=TRUE), ] nrow(filteredAI) ## ----CAP394_books_count5------------------------------------------------- filteredNN <- data[grep("Neural Network", data$Title,ignore.case=TRUE), ] nrow(filteredNN) ## ----CAP394_books_count6------------------------------------------------- filteredML <- data[grep("Machine Learning", data$Title,ignore.case=TRUE), ] nrow(filteredML) ## ----CAP394_books_table-------------------------------------------------- table(filteredDS$Year) ## ----CAP394_books_tables------------------------------------------------- DSFrequency <- as.data.frame(table(filteredDS$Year)) names(DSFrequency) <- c("Year","Data Science") DSFrequency$Year <- as.numeric(levels(DSFrequency$Year))[DSFrequency$Year] DSFrequency ## ----CAP394_books_tablesasdf--------------------------------------------- DMFrequency <- as.data.frame(table(filteredDM$Year)) names(DMFrequency) <- c("Year","Data Mining") DMFrequency$Year <- as.numeric(levels(DMFrequency$Year))[DMFrequency$Year] BDFrequency <- as.data.frame(table(filteredBD$Year)) names(BDFrequency) <- c("Year","Big Data") BDFrequency$Year <- as.numeric(levels(BDFrequency$Year))[BDFrequency$Year] AIFrequency <- as.data.frame(table(filteredAI$Year)) names(AIFrequency) <- c("Year","Artificial Intelligence") AIFrequency$Year <- as.numeric(levels(AIFrequency$Year))[AIFrequency$Year] NNFrequency <- as.data.frame(table(filteredNN$Year)) names(NNFrequency) <- c("Year","Neural Networks") NNFrequency$Year <- as.numeric(levels(NNFrequency$Year))[NNFrequency$Year] MLFrequency <- as.data.frame(table(filteredML$Year)) names(MLFrequency) <- c("Year","Machine Learning") MLFrequency$Year <- as.numeric(levels(MLFrequency$Year))[MLFrequency$Year] ## ----CAP394_books_mergedtables------------------------------------------- all <- Reduce(function(dtf1, dtf2) merge(dtf1,dtf2,by="Year",all=TRUE), list(DSFrequency,DMFrequency,BDFrequency,AIFrequency,NNFrequency,MLFrequency)) head(all) ## ----CAP394_books_fixedtables1------------------------------------------- all[is.na(all)] <- 0 str(all) ## ----CAP394_books_fixedtables2------------------------------------------- all <- all[all$Year >= 1980, ] all <- all[all$Year < 2018, ] ## ----CAP394_books_plot, fig.width=12,fig.height=6------------------------ melted <- melt(all,id="Year") colnames(melted) <- c("Year", "Keyword","Count") melted$thickness <- 1 melted$thickness[melted$Keyword=="Data Science"] <- 3 head(melted) ggplot(melted,aes(x=Year,y=Count,colour=Keyword,group=Keyword,size=thickness)) + geom_line()+ scale_size(range = c(1,3), guide="none")+ scale_x_continuous("Year", breaks=seq(1980,2017,2))+ guides(colour = guide_legend(override.aes = list(size=3)))+ ggtitle("Books")+ theme(axis.title=element_text(size=14), axis.text.x=element_text(size=11,angle=-90,vjust=0.5,hjust=1), axis.text.y=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12), plot.title=element_text(size=22))