Water Quality at Sydney Beaches
Ranking of Sydney beaches based on *Enterococci* concentration
By Manish Datt
TidyTuesday data for 2025-05-20
library(tidyverse)
library(ggtext)
water_quality <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-20/water_quality.csv')
water_quality
# A tibble: 123,530 × 10
region council swim_site date time enterococci_cfu_100ml
1 Western Sydney Hawkesbury C… Windsor … 2025-04-28 11:00 620
2 Sydney Harbour North Sydney… Hayes St… 2025-04-28 11:40 64
3 Sydney Harbour Willoughby C… Northbri… 2025-04-28 10:54 160
4 Sydney Harbour Northern Bea… Fairligh… 2025-04-28 09:28 54
5 Western Sydney Hawkesbury C… Yarramun… 2025-04-28 10:35 720
6 Sydney Harbour Northern Bea… Little M… 2025-04-28 09:19 230
7 Sydney Harbour City of Cana… Chiswick… 2025-04-28 13:06 120
8 Sydney Harbour Inner West C… Dawn Fra… 2025-04-28 08:04 280
9 Sydney Harbour Woollahra Mu… Rose Bay… 2025-04-28 08:50 60
10 Sydney Harbour Woollahra Mu… Camp Cove 2025-04-28 09:09 100
# ℹ 123,520 more rows
# ℹ 4 more variables: water_temperature_c
# latitude
glimpse(water_quality)
Rows: 123,530
Columns: 10
$ region
$ council
$ swim_site
$ date
$ time
Data Wrangling
wq_grp <- water_quality %>%
filter(!is.na(enterococci_cfu_100ml)) %>%
mutate(date_y = format(date, "%Y")) %>%
group_by(date_y, swim_site) %>%
summarise(mean_enterococci = mean(enterococci_cfu_100ml, na.rm = TRUE)) %>%
slice_min(order_by = mean_enterococci, n = 5)
summarise() has grouped output by 'date_y'. You can override using the
.groups argument.
site_freq <- wq_grp %>%
ungroup() %>%
count(swim_site, sort = TRUE)
Plotting
wq_grp%>%
mutate(swim_site = fct_rev(factor(swim_site, levels = site_freq$swim_site))) %>%
ggplot(aes(x = date_y, y=swim_site, fill=mean_enterococci)) +
scale_fill_gradient(low = "lightblue", high = "blue") +
geom_tile(color="white") +
labs(
title = "Year-wise top five swim-sites based on the lowest *Enterococci* concentration",
x = element_blank(),
y = element_blank(),
fill = "Average *Enterococci* <br> CFU/100ml",
caption = "Colony Forming Units (CFU) indicate number of viable bacteria that can form colonies when grown in a lab."
) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_textbox_simple(
padding = margin(5.5, 5.5, 5.5, 5.5),
margin = margin(5, 0, 10, 0)),
legend.title = element_markdown(hjust = 0.5),
legend.position = "inside",
legend.position.inside = c(0.80, 0.1),
legend.title.position = "top",
plot.caption = element_textbox_simple(
margin = margin(0, 0, 10, 175),
padding = margin(5.5, 0, 0, 0),
hjust = 0,
size = 8,
color = "gray40",
lineheight = 1.2
),
axis.title.x = element_text(family = "Consolas"),
) +
guides(fill = guide_colorbar(direction = "horizontal", barheight=.5)) +
coord_fixed(ratio = 1)
not found in Windows font database
#ggsave("swim_sites2.png", width = 8, height = 6, dpi = 300, bg = "white")