June 22, 2025
filter()select()mutate()The vdem function from vdemdata just downloads all of the data. Try running this code chunk. What do you see in democracy?
02:00
filter()== instead of >=<= and see what happens02:00
= versus === is used to assign values to variables, just like <-== is used to test if two values are equal to each otherfilter(year == 1990) will give you just the observations for 1990>= and <=>= is used to test if a value is greater than or equal to another value<= is used to test if a value is less than or equal to another valuefilter(year >= 1990) will give you the observations for 1990 and laterfilter(year <= 1990) will give you the observations for 1990 and earlierselect()v2x_libdem instead of v2x_polyarchy02:00
mutate()polyarchypolyarchy squared?democracy <- vdem |> # download the V-Dem dataset
filter(year == 2015) |> # keep only observations from 2015
select( # select (and rename) these variables
country = country_name, # name before the = sign is new name
vdem_ctry_id = country_id, # name after the = sign is old name
year,
polyarchy = v2x_polyarchy
) |>
mutate(
polyarchy_dbl = polyarchy * 2 # create variable 2X polyarchy
)
glimpse(democracy) 02:00
+ addition- subtraction* multiplication/ division^ exponentiation (also **)vdemdata Example# Load packages
library(vdemdata) # to download V-Dem data
library(dplyr)
# Download the data
democracy <- vdem |> # download the V-Dem dataset
filter(year == 2015) |> # filter year, keep 2015
select( # select (and rename) these variables
country = country_name, # the name before the = sign is the new name
vdem_ctry_id = country_id, # the name after the = sign is the old name
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region, # replace the values in region with country names
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
)
# View the data
glimpse(democracy)Use filter() to select years…
# Download the data
democracy <- vdem |>
filter(year == 2015) |> # keep 2015
select(
country = country_name,
vdem_ctry_id = country_id,
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region,
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
)Use select() to choose variables…
# Download the data
democracy <- vdem |>
filter(year == 2015) |>
select( # select (and rename) these variables
country = country_name, # the name before the = sign is the new name
vdem_ctry_id = country_id, # the name after the = sign is the old name
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region,
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
)Use mutate with case_match() to Recode Region….
# Download the data
democracy <- vdem |>
filter(year == 2015) |>
select(
country = country_name,
vdem_ctry_id = country_id,
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region, # replace the values in region with country names
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
# number on the left of the ~ is the V-Dem region code
# we are changing the number to the country name on the right
# of the equals sign
)Have a look at the V-Dem codebook
e_regionpol_7C)group_by() summarize() arrange()group_by(), summarize(), arrange()# group_by(), summarize() and arrange()
democracy |> # save result as new object
group_by(region) |> # group data by region
summarize( # summarize following vars (by region)
polyarchy_mean = mean(polyarchy, na.rm = TRUE), # calculate mean after remove NAs
libdem_median = median(libdem, na.rm = TRUE), # calculate median after remove NAs
gender = sd(gender, na.rm = TRUE), # calculate std. dev after remove NAs
gdp_pc = min(gdp_pc, na.rm = TRUE) # calculate minimum flfp after remove NAs
) |>
arrange(desc(polyarchy_mean)) # arrange in descending order by polyarchy scoreUse across() to Apply Same Function to Multiple Columns
Now try grouping by country instead of region and filter for years >= 2000.
polyarchy for Sweden?libdem New Zealand?gender for Norway?gdp_pc for Germany?Try using across() to calculate the mean of polyarchy, libdem, gender1 and gdp_pc for each country.