October 1, 2024
filter()
select()
mutate()
The vdem
function from vdemdata
just downloads all of the data. Try running this code chunk. What do you see in democracy
?
02:00
filter()
==
instead of >=
<=
and see what happens02:00
=
versus ==
=
is used to assign values to variables, just like <-
==
is used to test if two values are equal to each otherfilter(year == 1990)
will give you just the observations for 1990>=
and <=
>=
is used to test if a value is greater than or equal to another value<=
is used to test if a value is less than or equal to another valuefilter(year >= 1990)
will give you the observations for 1990 and laterfilter(year <= 1990)
will give you the observations for 1990 and earlierselect()
v2x_libdem
instead of v2x_polyarchy
02:00
mutate()
polyarchy
polyarchy
squared?democracy <- vdem |> # download the V-Dem dataset
filter(year == 2015) |> # keep only observations from 2015
select( # select (and rename) these variables
country = country_name, # name before the = sign is new name
vdem_ctry_id = country_id, # name after the = sign is old name
year,
polyarchy = v2x_polyarchy
) |>
mutate(
polyarchy_dbl = polyarchy * 2 # create variable 2X polyarchy
)
glimpse(democracy)
02:00
+
addition-
subtraction*
multiplication/
division^
exponentiation (also **
)vdemdata
Example# Load packages
library(vdemdata) # to download V-Dem data
library(dplyr)
# Download the data
democracy <- vdem |> # download the V-Dem dataset
filter(year == 2015) |> # filter year, keep 2015
select( # select (and rename) these variables
country = country_name, # the name before the = sign is the new name
vdem_ctry_id = country_id, # the name after the = sign is the old name
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region, # replace the values in region with country names
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
)
# View the data
glimpse(democracy)
Use filter()
to select years…
# Download the data
democracy <- vdem |>
filter(year == 2015) |> # keep 2015
select(
country = country_name,
vdem_ctry_id = country_id,
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region,
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
)
Use select()
to choose variables…
# Download the data
democracy <- vdem |>
filter(year == 2015) |>
select( # select (and rename) these variables
country = country_name, # the name before the = sign is the new name
vdem_ctry_id = country_id, # the name after the = sign is the old name
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region,
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
)
Use mutate
with case_match()
to Recode Region….
# Download the data
democracy <- vdem |>
filter(year == 2015) |>
select(
country = country_name,
vdem_ctry_id = country_id,
year,
polyarchy = v2x_polyarchy,
gdp_pc = e_gdppc,
region = e_regionpol_6C
) |>
mutate(
region = case_match(region, # replace the values in region with country names
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
# number on the left of the ~ is the V-Dem region code
# we are changing the number to the country name on the right
# of the equals sign
)
Have a look at the V-Dem codebook
e_regionpol_7C
)group_by()
summarize()
arrange()
group_by()
, summarize()
, arrange()
# group_by(), summarize() and arrange()
democracy |> # save result as new object
group_by(region) |> # group data by region
summarize( # summarize following vars (by region)
polyarchy_mean = mean(polyarchy, na.rm = TRUE), # calculate mean after remove NAs
libdem_median = median(libdem, na.rm = TRUE), # calculate median after remove NAs
gender = sd(gender, na.rm = TRUE), # calculate std. dev after remove NAs
gdp_pc = min(gdp_pc, na.rm = TRUE) # calculate minimum flfp after remove NAs
) |>
arrange(desc(polyarchy_mean)) # arrange in descending order by polyarchy score
Use across()
to Apply Same Function to Multiple Columns
Now try grouping by country instead of region and filter for years >= 2000.
polyarchy
for Sweden?libdem
New Zealand?gender
for Norway?gdp_pc
for Germany?Try using across()
to calculate the mean of polyarchy
, libdem
, gender1
and gdp_pc
for each country.