DataViz.manishdatt.com

Water Quality at Sydney Beaches

Ranking of Sydney beaches based on *Enterococci* concentration

By Manish Datt

TidyTuesday data for 2025-05-20

library(tidyverse)
library(ggtext)
water_quality <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-20/water_quality.csv')
water_quality

# A tibble: 123,530 × 10
region council swim_site date time enterococci_cfu_100ml

1 Western Sydney Hawkesbury C… Windsor … 2025-04-28 11:00 620
2 Sydney Harbour North Sydney… Hayes St… 2025-04-28 11:40 64
3 Sydney Harbour Willoughby C… Northbri… 2025-04-28 10:54 160
4 Sydney Harbour Northern Bea… Fairligh… 2025-04-28 09:28 54
5 Western Sydney Hawkesbury C… Yarramun… 2025-04-28 10:35 720
6 Sydney Harbour Northern Bea… Little M… 2025-04-28 09:19 230
7 Sydney Harbour City of Cana… Chiswick… 2025-04-28 13:06 120
8 Sydney Harbour Inner West C… Dawn Fra… 2025-04-28 08:04 280
9 Sydney Harbour Woollahra Mu… Rose Bay… 2025-04-28 08:50 60
10 Sydney Harbour Woollahra Mu… Camp Cove 2025-04-28 09:09 100
# ℹ 123,520 more rows
# ℹ 4 more variables: water_temperature_c , conductivity_ms_cm ,
# latitude , longitude

glimpse(water_quality)

Rows: 123,530
Columns: 10
$ region "Western Sydney", "Sydney Harbour", "Sydney Harb…
$ council "Hawkesbury City Council", "North Sydney Council…
$ swim_site "Windsor Beach", "Hayes Street Beach", "Northbri…
$ date 2025-04-28, 2025-04-28, 2025-04-28, 2025-04-28,…
$ time

Data Wrangling

wq_grp <- water_quality %>%
  filter(!is.na(enterococci_cfu_100ml)) %>% 
  mutate(date_y = format(date, "%Y")) %>% 
  group_by(date_y, swim_site) %>% 
  summarise(mean_enterococci = mean(enterococci_cfu_100ml, na.rm = TRUE)) %>% 
  slice_min(order_by = mean_enterococci, n = 5)

summarise() has grouped output by 'date_y'. You can override using the
.groups argument.

site_freq <- wq_grp %>%
  ungroup() %>% 
  count(swim_site, sort = TRUE)

Plotting

wq_grp%>% 
  mutate(swim_site = fct_rev(factor(swim_site, levels = site_freq$swim_site))) %>% 
  ggplot(aes(x = date_y, y=swim_site, fill=mean_enterococci)) +
  scale_fill_gradient(low = "lightblue", high = "blue") +
  geom_tile(color="white") +
  labs(
    title = "Year-wise top five swim-sites based on the lowest *Enterococci* concentration",
    x = element_blank(),
    y = element_blank(),
    fill = "Average *Enterococci* <br> CFU/100ml",
    caption = "Colony Forming Units (CFU) indicate number of viable bacteria that can form colonies when grown in a lab."
  ) +
  theme_minimal() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_textbox_simple(
      padding = margin(5.5, 5.5, 5.5, 5.5),
      margin = margin(5, 0, 10, 0)),
    legend.title = element_markdown(hjust = 0.5),
    legend.position = "inside",
    legend.position.inside = c(0.80, 0.1),
    legend.title.position = "top",
    plot.caption = element_textbox_simple(
      margin = margin(0, 0, 10, 175),
      padding = margin(5.5, 0, 0, 0),
      hjust = 0,
      size = 8,
      color = "gray40",
      lineheight = 1.2
    ),
    axis.title.x = element_text(family = "Consolas"),
    ) +
  guides(fill = guide_colorbar(direction = "horizontal", barheight=.5)) +
   coord_fixed(ratio = 1) 

not found in Windows font database

#ggsave("swim_sites2.png", width = 8, height = 6, dpi = 300, bg = "white")