new day
This commit is contained in:
parent
299898bdff
commit
54831103d4
230
Day 2.R
230
Day 2.R
@ -1,83 +1,5 @@
|
||||
library(tidyverse)
|
||||
|
||||
#
|
||||
# fertilityplots
|
||||
#
|
||||
|
||||
fertilityData <- structure(list(Country = structure(c(2L, 5L, 6L,
|
||||
9L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L), .Label = c("Abkhazia",
|
||||
"Afghanistan", "Akrotiri and Dhekelia", "Åland", "Albania",
|
||||
"Algeria", "American Samoa", "Andorra", "Angola", "Anguilla",
|
||||
"Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia",
|
||||
"Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh",
|
||||
"Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda",
|
||||
"Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
|
||||
"British Virgin Islands", "Brunei", "Bulgaria", "Burkina Faso",
|
||||
"Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Cayman Islands",
|
||||
"Central African Republic", "Chad", "Channel Islands", "Chile",
|
||||
"China", "Christmas Island", "Cocos Island", "Colombia", "Comoros",
|
||||
"Congo, Dem. Rep.", "Congo, Rep.", "Cook Is", "Costa Rica", "Cote d'Ivoire",
|
||||
"Croatia", "Cuba", "Cyprus", "Czech Republic", "Czechoslovakia",
|
||||
"Denmark", "Djibouti", "Dominica", "Dominican Republic", "East Germany",
|
||||
"Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea",
|
||||
"Eritrea and Ethiopia", "Estonia", "Ethiopia", "Faeroe Islands",
|
||||
"Falkland Is (Malvinas)", "Fiji", "Finland", "France", "French Guiana",
|
||||
"French Polynesia", "Gabon", "Gambia", "Georgia", "Germany",
|
||||
"Ghana", "Gibraltar", "Greece", "Greenland", "Grenada", "Guadeloupe",
|
||||
"Guam", "Guatemala", "Guernsey", "Guinea", "Guinea-Bissau", "Guyana",
|
||||
"Haiti", "Holy See", "Honduras", "Hong Kong, China", "Hungary",
|
||||
"Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Isle of Man",
|
||||
"Israel", "Italy", "Jamaica", "Japan", "Jersey", "Jordan", "Kazakhstan",
|
||||
"Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyz Republic", "Lao",
|
||||
"Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein",
|
||||
"Lithuania", "Luxembourg", "Macao, China", "Macedonia, FYR",
|
||||
"Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta",
|
||||
"Marshall Islands", "Martinique", "Mauritania", "Mauritius",
|
||||
"Mayotte", "Mexico", "Micronesia, Fed. Sts.", "Moldova", "Monaco",
|
||||
"Mongolia", "Montenegro", "Montserrat", "Morocco", "Mozambique",
|
||||
"Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "Netherlands Antilles",
|
||||
"New Caledonia", "New Zealand", "Ngorno-Karabakh", "Nicaragua",
|
||||
"Niger", "Nigeria", "Niue", "Norfolk Island", "North Korea",
|
||||
"North Yemen (former)", "Northern Cyprus", "Northern Mariana Islands",
|
||||
"Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea",
|
||||
"Paraguay", "Peru", "Philippines", "Pitcairn", "Poland", "Portugal",
|
||||
"Puerto Rico", "Qatar", "Reunion", "Romania", "Russia", "Rwanda",
|
||||
"Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia",
|
||||
"Senegal", "Serbia", "Serbia and Montenegro", "Serbia excluding Kosovo",
|
||||
"Seychelles", "Sierra Leone", "Singapore", "Slovak Republic",
|
||||
"Slovenia", "Solomon Islands", "Somalia", "Somaliland", "South Africa",
|
||||
"South Korea", "South Ossetia", "South Yemen (former)", "Spain",
|
||||
"Sri Lanka", "St. Barthélemy", "St. Helena", "St. Kitts and Nevis",
|
||||
"St. Lucia", "St. Martin", "St. Vincent and the Grenadines",
|
||||
"St.-Pierre-et-Miquelon", "Sudan", "Suriname", "Svalbard", "Swaziland",
|
||||
"Sweden", "Switzerland", "Syria", "Taiwan", "Tajikistan", "Tanzania",
|
||||
"Thailand", "Timor-Leste", "Togo", "Tokelau", "Tonga", "Transnistria",
|
||||
"Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Turks and Caicos Islands",
|
||||
"Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom",
|
||||
"United Korea (former)\n", "United States", "Uruguay", "USSR",
|
||||
"Uzbekistan", "Vanuatu", "Venezuela", "Vietnam", "Virgin Islands (U.S.)",
|
||||
"Wallis et Futuna", "West Bank and Gaza", "West Germany", "Western Sahara",
|
||||
"Yemen", "Yugoslavia", "Zambia", "Zimbabwe"), class = "factor"),
|
||||
`1800` = c(7, 4.6, 6.99, 6.93, 5, 6.8, 7.8, 5.64, 6.5, 5.1,
|
||||
8.1, 5.9), `1801` = c(7, 4.6, 6.99, 6.93, 5, 6.8, 7.8, 5.64,
|
||||
6.48, 5.1, 8.1, 5.9), `1802` = c(7, 4.6, 6.99, 6.93, 4.99,
|
||||
6.8, 7.81, 5.64, 6.46, 5.1, 8.1, 5.9), `1803` = c(7, 4.6,
|
||||
6.99, 6.93, 4.99, 6.8, 7.81, 5.64, 6.44, 5.1, 8.1, 5.9),
|
||||
`1804` = c(7, 4.6, 6.99, 6.93, 4.99, 6.8, 7.81, 5.64, 6.42,
|
||||
5.1, 8.1, 5.9), `1805` = c(7, 4.6, 6.99, 6.93, 4.98, 6.8,
|
||||
7.82, 5.64, 6.4, 5.1, 8.1, 5.9)), .Names = c("Country",
|
||||
"1800", "1801", "1802", "1803", "1804", "1805"), row.names = c(NA,
|
||||
-12L), class = c("tbl_df", "tbl", "data.frame"))
|
||||
|
||||
(fertilityTidy <- fertilityData %>%
|
||||
tidyr::pivot_longer(cols = -Country, names_to = "Year",
|
||||
values_to = "Fertility") %>%
|
||||
mutate(Year = as.integer(Year)))
|
||||
|
||||
ggplot(fertilityTidy, aes(Year, Fertility, color = Country)) +
|
||||
geom_point() +
|
||||
geom_line()
|
||||
|
||||
#
|
||||
#
|
||||
|
||||
@ -95,5 +17,157 @@ iris |> # Data set
|
||||
scale_fill_viridis(discrete = T) # Auto scales colors for color blind friendliness
|
||||
|
||||
|
||||
df <- dplyr::filter(gapminder::gapminder, year == 1992)
|
||||
|
||||
head(df)
|
||||
|
||||
library(hrbrthemes)
|
||||
|
||||
df |> ggplot(aes(gdpPercap,lifeExp,size=pop,color=continent)) +
|
||||
geom_point(alpha=0.5) +
|
||||
scale_size(range = c(1, 20)) +
|
||||
scale_x_log10()+
|
||||
# theme_ipsum() +
|
||||
scale_fill_viridis(discrete=TRUE, guide = "none", option="A") +
|
||||
theme(aspect.ratio = 0.8, legend.key.width = unit(3, "line"))+ # Setting plot ratio
|
||||
labs(title = "Gapminder for 1992",
|
||||
x = "Gross Domestic Product (log scale)",
|
||||
y = "Life Expectancy at birth (years)",
|
||||
color = "Continent", size = "Population")
|
||||
|
||||
library(gganimate)
|
||||
|
||||
ggplot(gapminder::gapminder,aes(gdpPercap,lifeExp,size=pop,color=continent)) +
|
||||
geom_point(alpha=0.5) +
|
||||
scale_size(range = c(1, 20)) +
|
||||
scale_x_log10()+
|
||||
# theme_ipsum() +
|
||||
scale_fill_viridis(discrete=TRUE, guide = "none", option="A") +
|
||||
theme(aspect.ratio = 0.8, legend.key.width = unit(3, "line"))+
|
||||
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
|
||||
transition_time(year) +
|
||||
ease_aes('linear') # Does not combine the output photos
|
||||
|
||||
|
||||
ggplot(data = mpg) +
|
||||
geom_point(mapping = aes(x = displ, y = hwy, color = displ), color = "blue")
|
||||
|
||||
sapply(mpg,is.factor)
|
||||
|
||||
head(mpg)
|
||||
?mpg
|
||||
|
||||
ggplot(data = mpg, aes(x = displ, y = hwy, color = displ, size = , shape = )) +
|
||||
geom_point()
|
||||
|
||||
|
||||
df <- data.frame(abc = 1, xyz = "a")
|
||||
df$x # Partial matching
|
||||
df[, "xyz"] # Giving vector
|
||||
df[, c("abc", "xyz")] # Giving data frame
|
||||
|
||||
df <- tibble(abc = 1, xyz = "a")
|
||||
df$x
|
||||
df[, "xyz"] # Keeping data frame
|
||||
df[, c("abc", "xyz")]
|
||||
|
||||
var<-"mpg"
|
||||
head(mtcars)
|
||||
|
||||
mtcars[var]
|
||||
|
||||
df <- tibble("1" = 1:4, "2" = 3:6)
|
||||
df[["1"]]
|
||||
plot(df$`1`,df$`2`)
|
||||
df$`3` <- (df$`1`/df$`2`)
|
||||
|
||||
|
||||
### dplyr
|
||||
|
||||
library(nycflights13)
|
||||
library(tidyverse)
|
||||
|
||||
flights
|
||||
|
||||
?flights
|
||||
|
||||
flights |>
|
||||
filter(month == 1, day == 1)
|
||||
|
||||
flights |>
|
||||
filter(arr_delay > 120)
|
||||
|
||||
flights |>
|
||||
filter(dest %in% c("IAH", "HOU"))
|
||||
|
||||
print(airlines)
|
||||
|
||||
flights |>
|
||||
filter(carrier %in% c("UA", "AA", "DL"))
|
||||
|
||||
flights |>
|
||||
filter(month %in% c(7:9))
|
||||
|
||||
flights |>
|
||||
filter(arr_delay > 120 & dep_delay==0)
|
||||
|
||||
flights |>
|
||||
filter(dep_time<=600)
|
||||
#
|
||||
# https://r4ds.had.co.nz/transform.html
|
||||
#
|
||||
flights |> arrange(!is.na(dep_time))
|
||||
|
||||
flights |> arrange(desc(dep_delay),dep_time)
|
||||
|
||||
flights |> mutate(speed=distance/air_time) |> arrange(desc(speed)) |> print(width=Inf)
|
||||
|
||||
flights |> arrange(desc(distance))
|
||||
|
||||
flights |> arrange(distance)
|
||||
|
||||
# Selections
|
||||
|
||||
flights |> select(all_of(c("dep_time", "dep_delay", "arr_time", "arr_delay")))
|
||||
flights |> select(dep_time, dep_delay, arr_time, arr_delay)
|
||||
flights |> select(dep_time, dep_delay, arr_time, arr_delay)
|
||||
|
||||
flights |> select(match(c("dep_time", "dep_delay", "arr_time", "arr_delay"),names(flights)))
|
||||
|
||||
flights |> select(any_of(vars))
|
||||
|
||||
vars <- c("year", "month", "day", "dep_delay", "arr_delay")
|
||||
|
||||
select(flights, contains("TIME", ignore.case = TRUE))
|
||||
|
||||
# Mutate
|
||||
library(lubridate)
|
||||
|
||||
flights |> mutate(
|
||||
dep_time_n =
|
||||
)
|
||||
|
||||
min_trans <- function(x){
|
||||
ifelse(x== 2400, 0, x %/% 100*60 + x %% 100)
|
||||
}
|
||||
|
||||
flights |> transmute(across(c(dep_time,sched_dep_time),min_trans))
|
||||
|
||||
flights |> transmute(
|
||||
air_time = air_time,
|
||||
air_time_c = arr_time-dep_time # Differences due to time zones
|
||||
) |>
|
||||
slice_sample(n = 5e3) |>
|
||||
ggplot()+
|
||||
geom_point(aes(air_time,air_time_c))+
|
||||
geom_abline(color="red")
|
||||
|
||||
|
||||
flights |>
|
||||
transmute(
|
||||
sched_dep_time, dep_delay, dep_time
|
||||
)
|
||||
|
||||
1:3+1:10
|
||||
|
||||
|
||||
|
142
Day 3.R
Normal file
142
Day 3.R
Normal file
@ -0,0 +1,142 @@
|
||||
library(tidyverse)
|
||||
library(nycflights13)
|
||||
|
||||
# 5.6
|
||||
|
||||
delays <- flights %>%
|
||||
group_by(dest) %>%
|
||||
summarise(
|
||||
count = n(),
|
||||
dist = mean(distance, na.rm = TRUE),
|
||||
delay = mean(arr_delay, na.rm = TRUE)
|
||||
) %>%
|
||||
filter(count > 20, dest != "HNL")
|
||||
|
||||
ggplot(data = delays, mapping = aes(x = dist, y = delay)) +
|
||||
geom_point(aes(size = count), alpha = 1/3) +
|
||||
geom_smooth(se = FALSE)
|
||||
|
||||
library(microbenchmark)
|
||||
microbenchmark::microbenchmark(
|
||||
flights |>
|
||||
rowwise() |> # loops rowwise, instead of vectorised.
|
||||
mutate(arr_time2 = arr_time + 1) |>
|
||||
ungroup() # Reverses the rowwise
|
||||
|
||||
flights |>
|
||||
mutate(arr_time2 = arr_time + 1)
|
||||
)
|
||||
|
||||
# 5.6.1 Exercises
|
||||
|
||||
flights |>
|
||||
group_by(flight) |>
|
||||
summarise(med = median(arr_delay, na.rm=TRUE)) |>
|
||||
filter(med==-15)
|
||||
|
||||
flights |>
|
||||
group_by(flight) |>
|
||||
summarise(med = median(arr_delay, na.rm=TRUE)) |>
|
||||
filter(med==15)
|
||||
|
||||
flights |>
|
||||
filter(arr_delay==10) |>
|
||||
group_by(flight) |>
|
||||
summarise(n=n()) |>
|
||||
filter(n >= 10) |>
|
||||
arrange(desc(n))
|
||||
|
||||
|
||||
flights |>
|
||||
group_by(flight) |>
|
||||
summarise(early = mean(arr_delay >= 0, na.rm = T),
|
||||
n = n())
|
||||
|
||||
not_cancelled <- flights %>%
|
||||
filter(!is.na(dep_delay), !is.na(arr_delay))
|
||||
|
||||
not_cancelled %>% count(dest)
|
||||
|
||||
flights |>
|
||||
filter(!is.na(dep_delay), !is.na(arr_delay)) |>
|
||||
group_by(dest) |>
|
||||
summarise(n= n())
|
||||
|
||||
flights |>
|
||||
filter(!is.na(dep_delay), !is.na(arr_delay)) |>
|
||||
group_by(tailnum) |>
|
||||
summarise(n = sum(distance))
|
||||
|
||||
not_cancelled %>% count(tailnum, wt = distance)
|
||||
|
||||
flights |>
|
||||
(\(.) filter(., complete.cases(.))) ()
|
||||
|
||||
flights |>
|
||||
group_by(year, month, day) |>
|
||||
summarise(canc = sum(is.na(dep_delay)),
|
||||
n = n()) |>
|
||||
arrange(desc(canc))
|
||||
|
||||
|
||||
flights |>
|
||||
group_by(year, month, day) |>
|
||||
summarise(canc = mean(is.na(dep_delay)), # calculates the proportion of NAs per day
|
||||
del = mean(dep_delay, na.rm=TRUE)) |>
|
||||
ggplot(aes(canc,del)) +
|
||||
geom_point() +
|
||||
geom_smooth() +
|
||||
theme_bw(18)
|
||||
|
||||
# 5.7.1 Exercises
|
||||
|
||||
flights |>
|
||||
group_by(tailnum) |>
|
||||
filter(sum(!is.na(arr_delay))>1) |>
|
||||
summarise(m_del = max(arr_delay, na.rm = TRUE)) |>
|
||||
slice_max(m_del, n = 1)
|
||||
|
||||
flights |>
|
||||
group_by(hour) |>
|
||||
summarise(mean_del=mean(arr_delay , na.rm = TRUE)) |>
|
||||
ggplot(aes(hour,mean_del)) +
|
||||
geom_point() +
|
||||
geom_smooth()
|
||||
|
||||
flights %>%
|
||||
group_by(dest) %>%
|
||||
mutate(prop_delay = arr_delay / sum(arr_delay, na.rm = TRUE)) %>%
|
||||
relocate(prop_delay)
|
||||
|
||||
|
||||
# Relational data
|
||||
|
||||
airports |>
|
||||
right_join(flights |>
|
||||
group_by(dest) |>
|
||||
summarise(avg_del = mean(arr_delay,na.rm=TRUE)),
|
||||
c("faa" = "dest")) |>
|
||||
ggplot(aes(lon, lat, size = avg_del, color = avg_del)) +
|
||||
borders("state") +
|
||||
geom_point(alpha=.6) +
|
||||
coord_quickmap()+
|
||||
theme_bw(18)+
|
||||
scale_color_viridis_c(direction = -1)
|
||||
|
||||
library(lubridate)
|
||||
flights |>
|
||||
group_by(tailnum) |>
|
||||
summarise(avg_del = mean(arr_delay,na.rm=TRUE)) |>
|
||||
left_join(planes |> select(tailnum, year),
|
||||
c("tailnum")) |>
|
||||
(\(.) filter(.,complete.cases(.))) () |>
|
||||
ggplot(aes(2014-year, avg_del, color = avg_del)) +
|
||||
geom_point(alpha=.6)+
|
||||
theme_bw(18)+
|
||||
scale_color_viridis_c(direction = -1) +
|
||||
geom_smooth() +
|
||||
theme(aspect.ratio = 0.8, legend.key.width = unit(3, "line"))+ # Setting plot ratio
|
||||
labs(y="Mean delay", x="Age")
|
||||
|
||||
|
||||
weather
|
Loading…
x
Reference in New Issue
Block a user