library(ggplot2)
|>
swiss ggplot() +
aes(x = Education,
y = Examination) +
geom_point() +
scale_colour_brewer()
ggplot, part 1
Rworkshop
Thursday, 8 February 2024
Learning objectives
ggplot2
data.frame/tibble
ggplot2
Graphs are split into layers
Data
A | B | C | D |
---|---|---|---|
2 | 3 | 4 | a |
1 | 2 | 1 | a |
4 | 5 | 15 | b |
9 | 10 | 80 | b |
Aesthetics function
x = A
y = C
shape = D
Scaling to physical units \(x = \frac{A-min(A)}{range(A)} \times width\)
\(y = \frac{C-min(C)}{range(C)} \times height\)
\(shape = f_{s}(D)\)
Geometry
Scatter plot
Data drawn as points
Mapped data
x | y | shape |
---|---|---|
25 | 11 | circle |
0 | 0 | circle |
75 | 53 | square |
200 | 300 | square |
What if we want to split into panels circles and squares?
Redundancy
shape
aesthetic is free for another variable.Warning
ggplot2
layers are combined with +
!%>%
or the base pipe |>
will not work!install.packages("palmerpenguins")
Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package v0.1.0
# A tibble: 344 × 8
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
<fct> <fct> <dbl> <dbl> <int> <int>
1 Adelie Torgersen 39.1 18.7 181 3750
2 Adelie Torgersen 39.5 17.4 186 3800
3 Adelie Torgersen 40.3 18 195 3250
4 Adelie Torgersen NA NA NA NA
5 Adelie Torgersen 36.7 19.3 193 3450
6 Adelie Torgersen 39.3 20.6 190 3650
7 Adelie Torgersen 38.9 17.8 181 3625
8 Adelie Torgersen 39.2 19.6 195 4675
9 Adelie Torgersen 34.1 18.1 193 3475
10 Adelie Torgersen 42 20.2 190 4250
# ℹ 334 more rows
# ℹ 2 more variables: sex <fct>, year <int>
ggplot(penguins) +
aes(x = flipper_length_mm,
y = body_mass_g,
color = sex) +
geom_point() +
scale_color_manual(values = c("darkorange", "cyan4"),
na.translate = FALSE) +
labs(title = "Penguin flipper and body mass",
caption = "Horst AM, Hill AP, Gorman KB (2020)",
subtitle = "Dimensions for male/female Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Penguin sex")
ggplot(penguins) +
aes(x = flipper_length_mm,
y = body_mass_g,
color = sex) +
geom_point() +
scale_color_manual(values = c("darkorange", "cyan4"), na.translate = FALSE) +
labs(title = "Penguin flipper and body mass",
caption = "Horst AM, Hill AP, Gorman KB (2020)",
subtitle = "Dimensions for male/female Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Penguin sex") +
theme(plot.subtitle = element_text(size = 13),
axis.title = element_text(size = 11))
ggplot(penguins) +
aes(x = flipper_length_mm,
y = body_mass_g,
color = sex) +
geom_point() +
scale_color_manual(values = c("darkorange", "cyan4"), na.translate = FALSE) +
labs(title = "Penguin flipper and body mass",
caption = "Horst AM, Hill AP, Gorman KB (2020)",
subtitle = "Dimensions for male/female Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Penguin sex") +
theme(plot.subtitle = element_text(size = 13),
axis.title = element_text(size = 11)) +
theme(legend.position = "bottom",
legend.background = element_rect(fill = "white", color = NA))
ggplot(penguins) +
aes(x = flipper_length_mm, y = body_mass_g, color = sex) +
geom_point() +
scale_color_manual(values = c("darkorange", "cyan4"), na.translate = FALSE) +
labs(title = "Penguin flipper and body mass",
caption = "Horst AM, Hill AP, Gorman KB (2020)",
subtitle = "Dimensions for male/female Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Penguin sex") +
theme(plot.subtitle = element_text(size = 13),
axis.title = element_text(size = 11)) +
theme(legend.position = "bottom",
legend.background = element_rect(fill = "white", color = NA)) +
theme(plot.caption = element_text(hjust = 0, face = "italic"),
plot.caption.position = "plot") +
facet_wrap(vars(species))
ggplot(penguins) +
aes(x = flipper_length_mm, y = body_mass_g, color = sex) +
geom_point() +
scale_color_manual(values = c("darkorange", "cyan4"), na.translate = FALSE) +
labs(title = "Penguin flipper and body mass",
caption = "Horst AM, Hill AP, Gorman KB (2020)",
subtitle = "Dimensions for male/female Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Penguin sex") +
theme(plot.subtitle = element_text(size = 13),
axis.title = element_text(size = 11)) +
theme(legend.position = "bottom",
legend.background = element_rect(fill = "white", color = NA)) +
theme(plot.caption = element_text(hjust = 0, face = "italic"),
plot.caption.position = "plot") +
facet_wrap(vars(species)) +
scale_x_continuous(guide = guide_axis(n.dodge = 2)) +
scale_y_continuous(labels = scales::label_comma())
geom_point()
geom_line()
geom_bar()
geom_violin()
geom_histogram()
geom_density()
They are present, it works because they have sensible default:
theme_grey
cartesian
identity
disabled
Requirements
aes()
map columns/variables data to aestheticsgeom
) have different expectations:
colour
mappingMapped parameters require two conditions:
aes()
Error in FUN(X[[i]], ...): object 'country' not found
string
as a different effect:aes()
aes()
and refer to a data columnCompare the code and the results
Note
aesthetics
in ggplot()
are passed on to allgeometries
.aesthetics
in geom_*()
are specific (and can overwrite inherited)Simpson’s paradox
Statistical correlation depending on stratification.
palmerpenguins
package if you haven’t yetpenguins
data set and plot bill_length_mm
, bill_depth_mm
and species
.island
to the aesthetics shape
.5
alpha = 0.7
)Suppose we want to connect dots by colors
Warning
Should be the job of geom_line()
ggplot(penguins,
aes(x = bill_length_mm,
y = bill_depth_mm,
shape = island,
colour = species)) +
geom_point() +
geom_smooth(method = "lm",
formula = 'y ~ x') +
labs(title = "Bill ratios of Palmer penguins",
caption = "Horst AM, Hill AP, Gorman KB (2020)",
subtitle = "Split per species / island",
shape = "Islands",
x = "cumen length (mm)",
y = "cumen depth (mm)")
ggplot2
doing the stat for yougeom_col()
stat()
function allows computation, like proportionsSee list in help pages
ggplot(penguins, aes(y = species)) +
geom_bar(aes(x = stat(count) / sum(count))) +
scale_x_continuous(labels = scales::label_percent())
x
scale in %
using scales
forcats
)fct_infreq()
library(forcats)
penguins |>
ggplot(aes(y = fct_infreq(species))) + #<<
geom_bar() +
scale_x_continuous(expand = c(0, NA)) +
labs(title = "Palmer penguins species",
y = NULL) +
theme_minimal(14) +
# nice trick from T. Pedersen
theme(panel.ontop = TRUE,
# better to hide the horizontal grid lines
panel.grid.major.y = element_blank())
y
by a categorical x
penguins |>
filter(!is.na(sex)) |>
# define aes here for both geometries
ggplot(aes(y = body_mass_g,
x = species,
fill = sex,
# for violin contours and dots
colour = sex
)) + # very transparent filling
geom_violin(alpha = 0.1, trim = FALSE) +
geom_point(position = position_jitterdodge(dodge.width = 0.9),
alpha = 0.5,
# don't need dots in legend
show.legend = FALSE)
ggbeeswarm
library(ggdist)
ggplot(penguins,
aes(y = species,
x = bill_depth_mm / bill_length_mm,
color = species, fill = species)) +
geom_violin(width = .5, fill = "white", alpha = 0.4,
size = 1.1, trim = FALSE) +
ggdist::stat_halfeye(
adjust = .33, width = .67,
alpha = 0.6, trim = FALSE,
position = position_nudge(y = .35)) +
ggbeeswarm::geom_quasirandom(groupOnX = FALSE,
alpha = .5, size = 3,
width = 0.25) +
scale_color_brewer(palette = "Set1", type = "qual") +
scale_fill_brewer(palette = "Set1", type = "qual") +
labs(x = "Bill ratio", y = NULL) +
theme(legend.position = "none",
axis.line = element_blank(),
panel.grid.major.x = element_line(colour = "grey90"),
axis.ticks = element_blank())
(Hint: think about inherited aesthetics)
penguins |>
ggplot() +
geom_point(aes(x = bill_length_mm,
y = body_mass_g)) +
geom_smooth(method = "lm")
Error in `geom_smooth()`:
! Problem while computing stat.
ℹ Error occurred in the 2nd layer.
Caused by error in `compute_layer()`:
! `stat_smooth()` requires the following missing aesthetics: x and y
ggplot2
outputs dots as they appear in the input dataYou learned to:
Acknowledgments
*
pagedown
), Christophe DervieuxFurther reading