8-R-DataCamp-Introduction-to-Writing-Functions-in-R
8-R-DataCamp-Introduction-to-Writing-Functions-in-R
1. How to Write A Function
1.1 Why you should use functions (video)
1.2 Calling functions
Instruction 1:
# Look at the gold medals data
gold_medals
# Note the arguments to median()
args(median)
# Rewrite this function call, following best practices
median(gold_medals, na.rm = TRUE)
Instruction 2:
# Note the arguments to rank()
args(rank)
# Rewrite this function call, following best practices
rank(-gold_medals, na.last = "keep", ties.method = "min")
1.3 The benefits of writing functions
1.4 Converting scripts into functions (video)
1.5 Converting a script to a function
1.6 Your first functions: tossing a coin
Instruction 1:
coin_sides <- c("head", "tail")
# Sample from coin_sides once
sample(coin_sides, size = 1)
Instruction 2:
# Write a template for your function, toss_coin()
toss_coin <- function() {
# (Leave the contents of the body for later)
# Add punctuation to finish the body
}
Instruction 3:
# Your script, from a previous step
coin_sides <- c("head", "tail")
sample(coin_sides, 1)
# Paste your script into the function body
toss_coin <- function() {
coin_sides <- c("head", "tail")
sample(coin_sides, 1)
}
Instruction 4:
# Your functions, from previous steps
toss_coin <- function() {
coin_sides <- c("head", "tail")
sample(coin_sides, 1)
}
# Call your function
toss_coin()
1.7 Inputs to functions
Instruction 1:
coin_sides <- c("head", "tail")
n_flips <- 10
# Sample from coin_sides n_flips times with replacement
sample(coin_sides, size= n_flips, replace = TRUE )
Instruction 2:
# Update the function to return n_flips coin tosses
toss_coin <- function(n_flips) {
coin_sides <- c("head", "tail")
sample(coin_sides, n_flips, replace = TRUE)
}
# Generate 10 coin tosses
toss_coin(10)
1.8 Multiple inputs to functions
Instruction 1:
coin_sides <- c("head", "tail")
n_flips <- 10
p_head <- 0.8
# Define a vector of weights
weights <- c(p_head, 1-p_head)
# Update so that heads are sampled with prob p_head
sample(coin_sides, n_flips, replace = TRUE, prob = weights)
Instruction 2:
# Update the function so heads have probability p_head
toss_coin <- function(n_flips, p_head) {
coin_sides <- c("head", "tail")
# Define a vector of weights
weights <- c(p_head, 1 - p_head)
# Modify the sampling to be weighted
sample(coin_sides, n_flips, replace = TRUE, prob = weights)
}
# Generate 10 coin tosses
toss_coin(10, p_head = 0.8)
1.9 Y kant I reed ur code? (video)
1.10 Data or detail?
1.11 Renaming GLM
Instruction 1:
# Run a generalized linear regression
glm(
# Model no. of visits vs. gender, income, travel
n_visits ~ gender + income + travel,
# Use the snake_river_visits dataset
data = snake_river_visits,
# Make it a Poisson regression
family = poisson
)
Instruction 2:
# Write a function to run a Poisson regression
run_poisson_regression <- function(data, formula) {
glm(formula, data, family = poisson)
}
Instruction 3:
# From previous step
run_poisson_regression <- function(data, formula) {
glm(formula, data, family = poisson)
}
# Re-run the Poisson regression, using your function
model <- snake_river_visits %>%
run_poisson_regression(n_visits ~ gender + income + travel)
# Run this to see the predictions
snake_river_explanatory %>%
mutate(predicted_n_visits = predict(model, ., type = "response"))%>%
arrange(desc(predicted_n_visits))
2. All about Arguments
2.1 Default arguments (video)
2.2 Numeric defaults
Instruction:
# Set the default for n to 5
cut_by_quantile <- function(x, n = 5, na.rm, labels, interval_type) {
probs <- seq(0, 1, length.out = n + 1)
qtiles <- quantile(x, probs, na.rm = na.rm, names = FALSE)
right <- switch(interval_type, "(lo, hi]" = TRUE, "[lo, hi)" = FALSE)
cut(x, qtiles, labels = labels, right = right, include.lowest = TRUE)
}
# Remove the n argument from the call
cut_by_quantile(
n_visits,
na.rm = FALSE,
labels = c("very low", "low", "medium", "high", "very high"),
interval_type = "(lo, hi]"
)
2.3 Logical defaults
Instruction:
# Set the default for na.rm to FALSE
cut_by_quantile <- function(x, n = 5, na.rm = FALSE, labels, interval_type) {
probs <- seq(0, 1, length.out = n + 1)
qtiles <- quantile(x, probs, na.rm = na.rm, names = FALSE)
right <- switch(interval_type, "(lo, hi]" = TRUE, "[lo, hi)" = FALSE)
cut(x, qtiles, labels = labels, right = right, include.lowest = TRUE)
}
# Remove the na.rm argument from the call
cut_by_quantile(
n_visits,
labels = c("very low", "low", "medium", "high", "very high"),
interval_type = "(lo, hi]"
)
2.4 NULL defaults
Instruction:
# Set the default for labels to NULL
cut_by_quantile <- function(x, n = 5, na.rm = FALSE, labels = NULL, interval_type) {
probs <- seq(0, 1, length.out = n + 1)
qtiles <- quantile(x, probs, na.rm = na.rm, names = FALSE)
right <- switch(interval_type, "(lo, hi]" = TRUE, "[lo, hi)" = FALSE)
cut(x, qtiles, labels = labels, right = right, include.lowest = TRUE)
}
# Remove the labels argument from the call
cut_by_quantile(
n_visits,
interval_type = "(lo, hi]"
)
2.5 Categorical defaults
Instruction 1:
# Set the categories for interval_type to "(lo, hi]" and "[lo, hi)"
cut_by_quantile <- function(x, n = 5, na.rm = FALSE, labels = NULL,
interval_type = c("(lo, hi]", "[lo, hi)")) {
# Match the interval_type argument
interval_type <- match.arg(interval_type)
probs <- seq(0, 1, length.out = n + 1)
qtiles <- quantile(x, probs, na.rm = na.rm, names = FALSE)
right <- switch(interval_type, "(lo, hi]" = TRUE, "[lo, hi)" = FALSE)
cut(x, qtiles, labels = labels, right = right, include.lowest = TRUE)
}
# Remove the interval_type argument from the call
cut_by_quantile(n_visits)
2.6 Passing arguments between functions (video)
2.7 Harmonic mean
Instruction 1:
# Look at the Standard and Poor 500 data
glimpse(std_and_poor500)
# Write a function to calculate the reciprocal
get_reciprocal <- function(x) {
1 / x
}
Instruction 2:
# From previous step
get_reciprocal <- function(x) {
1 / x
}
# Write a function to calculate the harmonic mean
calc_harmonic_mean <- function(x,na.rm = FALSE){
x %>%
get_reciprocal() %>%
mean(na.rm = na.rm) %>%
get_reciprocal()
}
Instruction 3:
# From previous steps
get_reciprocal <- function(x) {
1 / x
}
calc_harmonic_mean <- function(x) {
x %>%
get_reciprocal() %>%
mean() %>%
get_reciprocal()
}
std_and_poor500 %>%
# Group by sector
group_by(sector) %>%
# Summarize, calculating harmonic mean of P/E ratio
summarise(hmean_pe_ratio = calc_harmonic_mean(pe_ratio))
2.8 Dealing with missing values
Instruction 1:
# Add an na.rm arg with a default, and pass it to mean()
calc_harmonic_mean <- function(x,na.rm = FALSE) {
x %>%
get_reciprocal() %>%
mean(na.rm = na.rm) %>%
get_reciprocal()
}
Instruction 2:
# From previous step
calc_harmonic_mean <- function(x, na.rm = FALSE) {
x %>%
get_reciprocal() %>%
mean(na.rm = na.rm) %>%
get_reciprocal()
}
std_and_poor500 %>%
# Group by sector
group_by(sector) %>%
# Summarize, calculating harmonic mean of P/E ratio
summarize(hmean_pe_ratio = calc_harmonic_mean(pe_ratio,na.rm = TRUE))
2.9 Passing arguments with
Instruction 1:
# Swap na.rm arg for ... in signature and body
calc_harmonic_mean <- function(x, ...) {
x %>%
get_reciprocal() %>%
mean(...) %>%
get_reciprocal()
}
Instruction 2:
calc_harmonic_mean <- function(x, ...) {
x %>%
get_reciprocal() %>%
mean(...) %>%
get_reciprocal()
}
std_and_poor500 %>%
# Group by sector
group_by(sector) %>%
# Summarize, calculating harmonic mean of P/E ratio
summarize(hmean_pe_ratio = calc_harmonic_mean(pe_ratio,na.rm = TRUE))
2.10 Checking arguments (video)
2.11 Throwing errors with bad arguments
Instruction:
# Assert that x is numeric
assert_is_numeric(x)
x %>%
get_reciprocal() %>%
mean(na.rm = na.rm) %>%
get_reciprocal()
}
# See what happens when you pass it strings
calc_harmonic_mean(std_and_poor500$sector)
2.12 Custom error logic
Instruction:
calc_harmonic_mean <- function(x, na.rm = FALSE) {
assert_is_numeric(x)
# Check if any values of x are non-positive
if(any(is_non_positive(x), na.rm = TRUE)) {
# Throw an error
stop("x contains non-positive values, so the harmonic mean makes no sense.")
}
x %>%
get_reciprocal() %>%
mean(na.rm = na.rm) %>%
get_reciprocal()
}
# See what happens when you pass it negative numbers
calc_harmonic_mean(std_and_poor500$pe_ratio - 20)
2.13 Fixing function arguments
Instruction:
# Update the function definition to fix the na.rm argument
calc_harmonic_mean <- function(x, na.rm = FALSE) {
assert_is_numeric(x)
if(any(is_non_positive(x), na.rm = TRUE)) {
stop("x contains non-positive values, so the harmonic mean makes no sense.")
}
# Use the first value of na.rm, and coerce to logical
na.rm <- coerce_to(use_first(na.rm), target_class = "logical")
x %>%
get_reciprocal() %>%
mean(na.rm = na.rm) %>%
get_reciprocal()
}
# See what happens when you pass it malformed na.rm
calc_harmonic_mean(std_and_poor500$pe_ratio, na.rm = 1:5)
3. Return Values and Scope
3.1 Returning values from functions (video)
3.2 Returning early
Instruction:
is_leap_year <- function(year) {
# If year is div. by 400 return TRUE
if(is_divisible_by(year, 400)) {
return(TRUE)
}
# If year is div. by 100 return FALSE
if(is_divisible_by(year, 100)) {
return(FALSE)
}
# If year is div. by 4 return TRUE
if(is_divisible_by(year, 4)) {
return(TRUE)
}
# Otherwise return FALSE
return(FALSE)
}
3.3 Returning invisibly
Instruction 1:
# Using cars, draw a scatter plot of dist vs. speed
plt_dist_vs_speed <- plot(dist ~ speed, data = cars)
# Oh no! The plot object is NULL
plt_dist_vs_speed
Instruction 2:
# Define a pipeable plot fn with data and formula args
pipeable_plot <- function(data,formula) {
# Call plot() with the formula interface
plot(formula,data)
# Invisibly return the input dataset
invisible(data)
}
# Draw the scatter plot of dist vs. speed again
plt_dist_vs_speed <- cars %>%
pipeable_plot(dist ~ speed)
# Now the plot object has a value
plt_dist_vs_speed
3.4 Returning multiple values from functions (video)
3.5 Returning many things
Instruction 1:
# Look at the structure of model (it's a mess!)
str(model)
# Use broom tools to get a list of 3 data frames
list(
# Get model-level values
model = glance(model),
# Get coefficient-level values
coefficients = tidy(model),
# Get observation-level values
observations = augment(model)
)
Instruction 2:
# Wrap this code into a function, groom_model
groom_model <- function(model) {
list(
model = glance(model),
coefficients = tidy(model),
observations = augment(model)
)
}
Instruction 3:
# From previous step
groom_model <- function(model) {
list(
model = glance(model),
coefficients = tidy(model),
observations = augment(model)
)
}
# Call groom_model on model, assigning to 3 variables
c(mdl, cff, obs) %<-% groom_model(model)
# See these individual variables
mdl; cff; obs
3.6 Returning metadata
Instruction:
pipeable_plot <- function(data, formula) {
plot(formula, data)
# Add a "formula" attribute to data
attr(data, "formula") <- formula
invisible(data)
}
# From previous exercise
plt_dist_vs_speed <- cars %>%
pipeable_plot(dist ~ speed)
# Examine the structure of the result
str(plt_dist_vs_speed)
3.7 Environments (video)
3.8 Creating and exploring environments
Instruction 1:
# Add capitals, national_parks, & population to a named list
rsa_lst <- list(
capitals = capitals,
national_parks = national_parks,
population = population
)
# List the structure of each element of rsa_lst
ls.str(rsa_lst)
Instruction 2:
# From previous step
rsa_lst <- list(
capitals = capitals,
national_parks = national_parks,
population = population
)
# Convert the list to an environment
rsa_env <- list2env(rsa_lst)
# List the structure of each variable
ls.str(rsa_env)
Instruction 3:
# From previous steps
rsa_lst <- list(
capitals = capitals,
national_parks = national_parks,
population = population
)
rsa_env <- list2env(rsa_lst)
# Find the parent environment of rsa_env
parent <- parent.env(rsa_env)
# Print its name
environmentName(parent)
3.9 Do variables exist?
Instruction:
# Compare the contents of the global environment and rsa_env
ls.str(globalenv())
ls.str(rsa_env)
# Does population exist in rsa_env?
exists("population", envir = rsa_env)
# Does population exist in rsa_env, ignoring inheritance?
exists("population", envir = rsa_env, inherits = FALSE)
3.10 Scope and precedence (video)
3.11 Can a function find its variables?
3.12 Can you access variables from inside functions?
3.13 Variable precedence 1
3.14 Variable precedence 2
4. Case Study on Gain Yields
4.1 Grain yields and unit conversion
4.2 Converting areas to metric 1
Instruction 1:
# Write a function to convert acres to sq. yards
acres_to_sq_yards <- function(acres) {
acres * 4840
}
Instruction 2:
# Write a function to convert yards to meters
yards_to_meters<- function(yards){
yards * 36 * 0.0254
}
Instruction 3:
# Write a function to convert sq. meters to hectares
sq_meters_to_hectares <- function(sq_meters){
sq_meters / 10000
}
4.3 Converting areas to metric 2
Instruction 1:
# Write a function to convert sq. yards to sq. meters
sq_yards_to_sq_meters <- function(sq_yards) {
sq_yards %>%
# Take the square root
sqrt() %>%
# Convert yards to meters
yards_to_meters() %>%
# Square it
raise_to_power(2)
}
Instruction 2:
# Load the function from the previous step
load_step2()
# Write a function to convert acres to hectares
acres_to_hectares <- function(acres) {
acres %>%
# Convert acres to sq yards
acres_to_sq_yards() %>%
# Convert sq yards to sq meters
sq_yards_to_sq_meters() %>%
# Convert sq meters to hectares
sq_meters_to_hectares
}
Instruction 3:
# Load the functions from the previous steps
load_step3()
# Define a harmonic acres to hectares function
harmonic_acres_to_hectares <- function(acres) {
acres %>%
# Get the reciprocal
get_reciprocal() %>%
# Convert acres to hectares
acres_to_hectares() %>%
# Get the reciprocal again
get_reciprocal()
}
4.4 Converting yields to metric
Instruction 1:
# Write a function to convert lb to kg
lbs_to_kgs <- function(masses){
masses * 0.45359237
}
Instruction 2:
# Write a function to convert bushels to lbs
bushels_to_lbs <- function(bushels, crop) {
# Define a lookup table of scale factors
c(barley = 48, corn = 56, wheat = 60) %>%
# Extract the value for the crop
extract(crop) %>%
# Multiply by the no. of bushels
multiply_by(bushels)
}
Instruction 3:
# Load fns defined in previous steps
load_step3()
# Write a function to convert bushels to kg
bushels_to_kgs <- function(bushels, crop) {
bushels %>%
# Convert bushels to lbs for this crop
bushels_to_lbs(crop) %>%
# Convert lbs to kgs
lbs_to_kgs()
}
Instruction 4:
# Load fns defined in previous steps
load_step4()
# Write a function to convert bushels/acre to kg/ha
bushels_per_acre_to_kgs_per_hectare <- function(bushels_per_acre, crop = c("barley", "corn", "wheat")) {
# Match the crop argument
crop <- match.arg(crop)
bushels_per_acre %>%
# Convert bushels to kgs for this crop
bushels_to_kgs(crop) %>%
# Convert harmonic acres to ha
harmonic_acres_to_hectares()
}
4.5 Applying the unit conversion
Instruction 1:
# View the corn dataset
glimpse(corn)
corn %>%
# Add some columns
mutate(
# Convert farmed area from acres to ha
farmed_area_ha = acres_to_hectares(farmed_area_acres),
# Convert yield from bushels/acre to kg/ha
yield_kg_per_ha = bushels_per_acre_to_kgs_per_hectare(
yield_bushels_per_acre,
crop = "corn"
)
)
Instruction 2:
# Wrap this code into a function
fortify_with_metric_units <- function(data, crop) {
data %>%
mutate(
farmed_area_ha = acres_to_hectares(farmed_area_acres),
yield_kg_per_ha = bushels_per_acre_to_kgs_per_hectare(
yield_bushels_per_acre,
crop = crop
)
)
}
# Try it on the wheat dataset
fortify_with_metric_units(wheat, crop = "wheat")
4.6 Visualizing grain yields (video)
4.7 Plotting yields over time
Instruction 1:
# Using corn, plot yield (kg/ha) vs. year
ggplot(corn, aes(year, yield_kg_per_ha)) +
# Add a line layer, grouped by state
geom_line(aes(group = state)) +
# Add a smooth trend layer
geom_smooth()
Instruction 2:
# Wrap this plotting code into a function
plot_yield_vs_year <- function(data){
ggplot(data, aes(year, yield_kg_per_ha)) +
geom_line(aes(group = state)) +
geom_smooth()
}
# Test it on the wheat dataset
plot_yield_vs_year(wheat)
4.8 A nation divided
Instruction 1:
# Inner join the corn dataset to usa_census_regions by state
corn %>%
inner_join(usa_census_regions, by = "state")
Instruction 2:
# Wrap this code into a function
fortify_with_census_region <- function(data) {
data %>%
inner_join(usa_census_regions, by = "state")
}
# Try it on the wheat dataset
fortify_with_census_region(wheat)
4.9 Plotting yields over time by region
Instruction 1:
# Plot yield vs. year for the corn dataset
plot_yield_vs_year(corn) +
# Facet, wrapped by census region
facet_wrap(vars(census_region))
Instruction 2:
# Wrap this code into a function
plot_yield_vs_year_by_region <- function(data) {
plot_yield_vs_year(data) +
facet_wrap(vars(census_region))
}
# Try it on the wheat dataset
plot_yield_vs_year_by_region(wheat)
4.10 Modeling grain yields (video)
4.11 Running a model
Instruction 1:
# Run a generalized additive model of
# yield vs. smoothed year and census region
gam(yield_kg_per_ha ~ s(year) + census_region, data = corn)
Instruction 2:
# Wrap the model code into a function
run_gam_yield_vs_year_by_region <- function(data) {
gam(yield_kg_per_ha ~ s(year) + census_region, data = data)
}
# Try it on the wheat dataset
run_gam_yield_vs_year_by_region(wheat)
4.12 Making yield predictions
Instruction 1:
# Make predictions in 2050
predict_this <- data.frame(
year = 2050,
census_region = census_regions
)
# Predict the yield
pred_yield_kg_per_ha <- predict(corn_model, predict_this, type = "response")
predict_this %>%
# Add the prediction as a column of predict_this
mutate(pred_yield_kg_per_ha = pred_yield_kg_per_ha)
Instruction 2:
# Wrap this prediction code into a function
predict_yields <- function(model,year) {
predict_this <- data.frame(
year = year,
census_region = census_regions
)
pred_yield_kg_per_ha <- predict(model, predict_this, type = "response")
predict_this %>%
mutate(pred_yield_kg_per_ha = pred_yield_kg_per_ha)
}
# Try it on the wheat dataset
predict_yields(wheat_model,2050)
4.13 Do it all over
Instruction 1:
fortified_barley <- barley %>%
# Fortify with metric units
fortify_with_metric_units() %>%
# Fortify with census regions
fortify_with_census_region()
# See the result
glimpse(fortified_barley)
Instruction 2:
# From previous step
fortified_barley <- barley %>%
fortify_with_metric_units() %>%
fortify_with_census_region()
# Plot yield vs. year by region
plot_yield_vs_year_by_region(fortified_barley)
Instruction 3:
# From previous step
fortified_barley <- barley %>%
fortify_with_metric_units() %>%
fortify_with_census_region()
fortified_barley %>%
# Run a GAM of yield vs. year by region
run_gam_yield_vs_year_by_region() %>%
# Make predictions of yields in 2050
predict_yields(2050)
4.14 Congratulations
上一篇: 输入10个数,求出最大元素是第几个数(数组作为函数參数)
下一篇: 微机原理 接口部分编程题复习