--- title: "Marathon" author: "VIncent HELOIN" date: "5/19/2020" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r Marathon} library(data.table) library(lubridate) library(pacman) p_load(dplyr, ggplot2, tidyr, stringr, lubridate,gridExtra,grid,ggpmisc) # Get the files names files = list.files(pattern="*.csv") # The same using `rbindlist` Boston = rbindlist(lapply(files, fread),fill=T) Boston$`Official Time` <- hms(Boston$`Official Time`) Boston$`Official Time` <- hms(Boston$`Official Time`) Boston$Duration_hr <- as.duration(Boston$`Official Time`)/3600 ```` ```{r, fig.width=12} #Line chart # Bar chart # Histogram # Pie chart # Bubble Boston <- Boston %>% mutate(agegroup = case_when(Age >= 70 & Age <= 79 ~ 7, Age >= 60 & Age <= 69 ~ 6, Age >= 50 & Age <= 59 ~ 5, Age >= 40 & Age <= 49 ~ 4, Age >= 30 & Age <= 39 ~ 3, Age >= 20 & Age <= 29 ~ 2, Age >= 10 & Age <= 19 ~ 1)) Boston$age_group <- cut(Boston$Age, seq(0,100, 10)) Boston %>% group_by(agegroup) %>% summarise(average_Duration_hr = mean(Duration_hr)) %>% ggplot() + aes(x = "", y=agegroup,fill=average_Duration_hr ) + geom_bar(stat="identity",width=1,color="white") + coord_polar("y", start=0) ```` ```{r} Boston <- Boston %>% rename(M_F = `M/F`) Boston$age_group <- cut(Boston$Age, seq(0,100, 10)) ``` ```{r} Boston %>% group_by(agegroup) %>% tally() %>% arrange(desc(n)) ``` ```{r} # Boston <- Boston %>% group_by(age_group) %>%tally() %>% mutate(pos = cumsum(n)- n/2) Boston %>% group_by(age_group) %>% tally() %>% ggplot() + aes(x = "", y=n, fill=age_group) + geom_bar(stat="identity",width=1,color="white") + coord_polar("y", start=0) + geom_text(aes(y = n, label = age_group), color = "white") ``` ```{r} # Chart_Pie_By_Age age.cat <- function(x, lower = 0, upper, by = 10, sep = "-", above.char = "+") { labs <- c(paste(seq(lower, upper - by, by = by), seq(lower + by - 1, upper - 1, by = by), sep = sep), paste(upper, above.char, sep = "")) cut(floor(x), breaks = c(seq(lower, upper, by = by), Inf), right = FALSE, labels = labs) } Boston %>% group_by(age_group) %>% tally() %>% ggplot() + aes(x=factor(1), y=n, fill=age_group) + geom_bar(stat="identity") + geom_text(aes(x= factor(1), y= cumsum(n)- n/10, label = n), size=5) + coord_polar(theta = "y") ``` ```{r} Boston %>% group_by(agegroup) %>% tally() %>% ggplot() + aes(x = "", y=agegroup) + geom_bar(stat="identity",width=1,color="white") + coord_polar("y", start=0) ```` ```{r} Boston$age_group <- cut(Boston$Age, seq(0,100, 10)) Boston %>% group_by(agegroup) %>% summarise(average_Duration_hr = mean(Duration_hr)) %>% ggplot() + aes(x = "", y=average_Duration_hr,fill=agegroup ) + geom_bar(stat="identity",width=1,color="white") + coord_polar("y", start=0) ```` ```{r} age.cat <- function(x, lower = 0, upper, by = 10, sep = "-", above.char = "+") { labs <- c(paste(seq(lower, upper - by, by = by), seq(lower + by - 1, upper - 1, by = by), sep = sep), paste(upper, above.char, sep = "")) cut(floor(x), breaks = c(seq(lower, upper, by = by), Inf), right = FALSE, labels = labs) } ``` ```{r} Boston$age_group <- age.cat(Boston$Age,0,80) Boston %>% group_by(age_group,M_F) %>% summarise(average_Duration_hr = mean(Duration_hr)) %>% ggplot() + aes(x = age_group, y=average_Duration_hr,fill=M_F ) + geom_bar(stat='identity',position=position_dodge()) ```` ```{r, fig.width=12} Boston %>% group_by(Age) %>% summarise(Median_Duration_hr = median(Duration_hr)) %>% ggplot(aes(x=Age, y=Median_Duration_hr),fill = M_F) + geom_smooth(model=lm) ``` ```{r, fig.width=12} Boston %>% group_by(agegroup,M_F) %>% tally() %>% ggplot() + geom_line(aes(x=agegroup, y=n,color = M_F) ) ```