Data Visualization Guidelines

R is an amazing tool for data analysis and visualization. The code on this page shows the a variety of plots using the R ggplot2 package to display statistical outputs. All the data shown on the page is fictitious and only meant for illustrative purposes.

Use of Fonts

  • Century Gothic Bold, font size 12 should be used for the labels and values on the graph. - Download here

General Rules

  • All axes should be labelled and where possible, the values for data points should be indicated on the graph.
  • Prevent use of grid lines whenever possible to not clutter plots
  • Use line breaks in long labels
  • Be conscious about the way axis are ordered. For example, order regions either from the lowest to the highest value or using the serpentine order.
  • label axis appropriately

Use of Colours

Please use the following colour schemes for different types of disaggregation.

Sex

Male



Hex: #206095
rgb(32, 96, 149)
.

Female



Hex: #F66068
rgb(246, 96,104)
.


Locality Type

National



#27A0CC
.

Urban



#871A5B
.

Rural



#22D0B6
.



Economic Sectors

Industry



#14607A
.

Agriculture



#07BB9E
.

Service



#F98B00
.



Neutral

Neutral is used on general variables. It is advise that the maximum number of variables per plot should not be more than 5.


#002060

#0070C0

#00B0F0

#8EA9DB

#9BC2E6

#FFFFCC



Positive ~ Negative


#6AA84F

#93C47D

#FFFFCC

#F4CCCC

#E06666



Population & Density


#FFFFCC

#C7E9B4

#7FCDBB

#41B6C4

#2C7FB8



Incidence


#FECCCC

#FF9999

#FF6666

#FF3333

#CC0000

#990000



Food / Non-food

Food


#3ECDB9

Non-food


#04BCFC


16 Regions and generic colours

These colour can be used for the 16 different regions or to assign more generic colours to a visualization that uses categories that are not defined in this visualization guide.

Ahafo


#4B644B

Ashanti


#7D96AF

Bono


#EBA07E

Bono East


#09979B

Central


#EA879C

Eastern


#E5B9AD

Greater Accra


#CC9B7A

Northern


#FDD835

North East


#0070C0

Oti


#AE2B47

Savannah


#F94240

Upper East


#903000

Upper West


#0F3B6C

Volta


#59A77F

Western


#FDAE6B

Western North


#B173A0


Missing value

If you want to assign a colour to missing values, you can use this grey tone.

Missing


#ADABAC

Using GSS theme

In the future, GSS will release it is own package for data visualizations. For now it suffices to run this code to load the all the R code to set the right defaults (colours, fonts, etc.) to create plots in the GGS theme.

Show the code
# load packages
library(ggplot2)
library(tidyverse)
library(ggtext)
library(gghighlight)
library(sysfonts)
library(showtextdb)
library(showtext)
library(glue)
library(scales)
library(kableExtra)
library(patchwork)
library(forcats)
library(tidyverse)
library(ggbump)
library(reshape2)
library(plotly)


# load the font
font_add("century gothic bold", "Font/Century Gothic.ttf")

# make sure ggplot recognizes the font 
# and set the font to high-res
showtext_auto()
showtext::showtext_opts(dpi = 300)

# set default colour for plots with multiple categories
options(ggplot2.discrete.colour = c("#210D69", "#DB2E76", "#586889", "#227C42"))
options(ggplot2.discrete.fill   = c("#210D69", "#DB2E76", "#586889", "#227C42"))

# set default colour for plots with a single category
update_geom_defaults("bar",   list(fill = "#27A0CC"))
update_geom_defaults("col",   list(fill = "#27A0CC"))

# update the font to show in geom_text()
update_geom_defaults("text",   list(family = "century gothic bold", size = 4.5 ))
GSS_font <- "century gothic bold"
# create a GGS theme based on the theme_gray()
gssthemes<-function(){
  theme_gray() %+replace%
    theme(
      text=element_text(family = "century gothic bold",
                        colour="black",
                        size=10),
      plot.margin = margin(0.5,0.3, 0.3, 0.3, "cm"),
      # plot.title =element_textbox_simple(family="century gothic bold", size=16,
      #                                    lineheight=1,
      #                                    margin=margin(b=10)),
      # plot.title.position="plot",
      plot.caption=element_markdown(hjust=0, color="gray",
                                    lineheight=1.5,
                                    margin =margin(t=10)),
      plot.caption.position="plot",
      axis.title.y=element_text(color="black", angle=90, size = 10),
      axis.title.x=element_text(color="black",size = 10),
      axis.text.x=element_text(color="black", size = 10, vjust = 0, margin = margin(t = 5, r = 5, b = 0, l = 0, unit = "pt")),
      axis.text.y=element_text(color="black", size = 10, hjust = 1, margin = margin(t = 5, r = 5, b = 0, l = 0, unit = "pt")),
      legend.text=element_text(color="black",  size = 10),
      panel.grid.major.y=element_line(color="gray", size=0.25),
      panel.grid.major.x=element_blank(),
      panel.grid.minor=element_blank(),
      panel.background=element_rect(fill="white", color=NA),
      plot.background=element_rect(fill="white", color=NA),
      legend.background=element_rect(fill="white", color=NA),
     strip.background =element_rect(fill="white",  color=NA),
     legend.key = element_rect(fill = "white", color = NA), 
     strip.text = element_text( size = 20,  margin = margin(t = 5, r = 0, b = 10, l = 0, unit = "pt"))
    )
}

# this will make the labels of the bar chart a bit nicer, by ending above the highest data point
nicelimits <- function(x) {
  range(scales::extended_breaks(only.loose = TRUE)(x))
}


# Define color palette

statscolours_color_scheme  <- c("#382873", "#0168C8", "#00B050")

#locality
national_color <- "#27A0CC"
urban_color <- "#871A5B"
rural_color <- "#22D0B6"
urbanrural_color_scheme  <- c("#27A0CC", "#871A5B", "#22D0B6")

#Sex
male_color <- "#206095"
female_color <- "#F66068"
malefemale_color_scheme <- c("#27A0CC", "#206095", "#F66068")

#positive/negative
negative_color <- "#cc3333"
positive_color <- "#33cccc"

#pallete
neutral_color_scheme  <- c("#002060", "#0070C0", "#00B0F0", "#8EA9DB", "#9BC2E6", "#FFFFCC")
posneg_color_scheme  <- c("#38761D","#6AA84F","#93C47D","#F4CCCC","#E06666","#990000")
posneutralneg_color_scheme  <- c("#38761D","#6AA84F","#FFFFCC","#E06666","#990000")
population_color_scheme  <- c("#FFFFCC","#C7E9B4","#7FCDBB","#41B6C4","#2C7FB8")
incidence_color_scheme  <- c("#FECCCC","#FF9999","#FF6666","#FF3333","#CC0000","#990000")


#Economic sector colour
industry_color<-"#14607A"
agric_color<-"#07BB9E"
services_color<-"#F98B00"
economic_sectors<- c("#14607A","#07BB9E","#F98B00")

#food
food_colour<- "#3ECDB9"
nonfood_colour<-"#04BCFC"

# Region
Ahafo_color_code <- "#4B644B"
Ashanti_color_code <- "#7D96AF"
Bono_color_code <- "#EBA07E"
Bono_East_color_code <- "#09979B"
Central_color_code <- "#EA879C"
Eastern_color_code <- "#E5B9AD"
Greater_Accra_color_code <- "#CC9B7A"
Northern_color_code <- "#FDD835"
North_East_code <- "#0070C0"
Oti_color_code <- "#AE2B47"
Savannah_color_code <- "#F94240"
Upper_East_color_code <- "#903000"
Upper_West_color_code <- "#0F3B6C"
Volta_color_code <- "#59A77F"
Western_color_code <- "#FDAE6B"
Western_North_color_code <- "#B173A0"

Bar charts

A bar chart is an effective way to visually represent data that is categorical or discrete in nature for example different regions in Ghana). It is particularly useful when comparing values across different categories or groups. Bar charts are ideal for showing the distribution or frequency of data, as well as identifying trends or patterns over time. They can be used to display numerical data such as quantities, percentages, and proportions. Overall, a bar chart is appropriate when you want to easily compare different categories or groups and understand the relative differences between them.

example data

Show the code
n_chopbars_df  <- tribble(~region_number, ~region, ~number_of_chop_bars, ~urban, ~rural,
                            1,"Western", 567, 300, 267,
                            2, "Central", 907, 600, 307,
                            3, "Greater Accra", 6990, 4792, 2198, 
                            4, "Volta", 278, 78, 200, 
                            5, "Eastern", 787, 287, 500,
                            6, "Ashanti",5621, 621, 5000,
                            7, "Western North", 345, 145, 200,
                            8,  "Ahafo", 678, 339, 339,
                            9, "Bono",1268, 600, 668, 
                            10,"Bono East", 1268,1048, 220,
                            11,"Oti",773, 500, 273, 
                            12,"Northern", 112, 12, 100,
                            13,"Savannah", 1289,503, 786, 
                            14,"North East",378, 142, 236,
                            15,"Upper East",1279,326, 953, 
                            16, "Upper West", 3782, 3456, 326) 

n_chopbars_df %>%
    kable() %>%
    kable_styling() %>% 
  scroll_box(height = "400px")
region_number region number_of_chop_bars urban rural
1 Western 567 300 267
2 Central 907 600 307
3 Greater Accra 6990 4792 2198
4 Volta 278 78 200
5 Eastern 787 287 500
6 Ashanti 5621 621 5000
7 Western North 345 145 200
8 Ahafo 678 339 339
9 Bono 1268 600 668
10 Bono East 1268 1048 220
11 Oti 773 500 273
12 Northern 112 12 100
13 Savannah 1289 503 786
14 North East 378 142 236
15 Upper East 1279 326 953
16 Upper West 3782 3456 326

One Color

Show the code
# Pipe operator to pass data frame to the next line
n_chopbars_df %>% 
  # Create a ggplot object and set the aesthetic mappings
  ggplot(mapping = aes(x = region, y = number_of_chop_bars)) +
  # Add a column chart with bars of equal width
  geom_col(width = 0.8) +
  # Apply a custom theme to the plot
  gssthemes() +
# The expand argument controls whether the range of the y-axis is expanded to include a small margin around the data. The limits argument sets the upper and lower limits of the y-axis to nicelimits, which is a function that makes sure the limits are always above the largest data point. he breaks argument sets the tick marks on the y-axis to use extended_breaks from the scales package, which generates a sequence of evenly spaced values with loose spacing.
  scale_y_continuous( expand = c( 0, 1 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    # Set the scale for the x-axis with tick labels rotated by 90 degrees
  scale_x_discrete(guide = guide_axis(angle = 90)) +
     # Add axis labels to the plot
    labs(x = NULL,
             y = "number of chop bars")+
    # Set the coordinate system for the plot, allowing data points to be partially displayed outside of the plot area.
  coord_cartesian(clip = "off")

One Color (Rotated) with labels

This example introduces coord_flip() instead of coord_cartesian() and bring back some theme elements to draw vertical instead of horizontal grid lines.

Show the code
# Pipe operator to pass data frame to the next line
n_chopbars_df %>% 
  # Create a ggplot object and set the aesthetic mappings
  ggplot(mapping = aes(x = region, y = number_of_chop_bars)) +
  # Add a column chart with bars of equal width
  geom_col(width = 0.8) +
    # add text to the end of the plot
     geom_text(mapping = aes(label = number_of_chop_bars), hjust = -0.2) +
  # Apply a custom theme to the plot
  gssthemes() +
    # The expand argument controls whether the range of the y-axis is expanded to include a small margin around the data. 
    # The limits argument sets the upper and lower limits of the y-axis to nicelimits, which is a function that makes sure 
    # the limits are always above the largest data point. he breaks argument sets the tick marks on the y-axis to use
    # extended_breaks from the scales package, which generates a sequence of evenly spaced values with loose spacing.
  scale_y_continuous( expand = c( 0, 1 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    # Add axis labels to the plot
    labs(x = NULL,
             y = "number of chop bars")+
    # Set the coordinate system for the plot, allowing data points to be partially displayed outside of the plot area.
  coord_flip(clip = "off") +
    # change the theme a bit so that 1) the axis lines are vertical and 2) the labels are right alligend
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank(),
                axis.text.x = element_text(vjust = 0.5))

One Color (Rotated) with labels and ordered

You can use the reorder() function to order the axis labels.

Show the code
# Pipe operator to pass data frame to the next line
n_chopbars_df %>% 
  # Create a ggplot object and set the aesthetic mappings
    # use the reorder function to reorder the the bars
  ggplot(mapping = aes(x = reorder(region,number_of_chop_bars), y = number_of_chop_bars)) +
  # Add a column chart with bars of equal width
  geom_col(width = 0.8) +
    # add text to the end of the plot, and comma at all thousands
     geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2,) +
  # Apply a custom theme to the plot
  gssthemes() +
    # The expand argument controls whether the range of the y-axis is expanded to include a small margin around the data. 
    # The limits argument sets the upper and lower limits of the y-axis to nicelimits, which is a function that makes sure 
    # the limits are always above the largest data point. he breaks argument sets the tick marks on the y-axis to use
    # extended_breaks from the scales package, which generates a sequence of evenly spaced values with loose spacing.
  scale_y_continuous( expand = c( 0, 1 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    # Add axis labels to the plot
    labs(x = NULL,
             y = "number of chop bars")+
    # Set the coordinate system for the plot, allowing data points to be partially displayed outside of the plot area.
  coord_flip(clip = "off") +
    # change the theme a bit so that 1) the axis lines are vertical and 2) the labels are right alligend
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank(),
                axis.text.x = element_text(vjust = 0.5))

Show different categories using color palettes (stacked)

If you want to show different categories, you can use the fill = categorical variable argument inside the aesthetics mapping. To do this, the data needs to be in a long format

example data (long format)

Show the code
n_chopbars_df_longformat <- n_chopbars_df %>% 
    select(-number_of_chop_bars ) %>% 
    pivot_longer(-c(region_number, region), names_to = "locality", values_to = "number_of_chop_bars")

n_chopbars_df_longformat %>%
    kable() %>%
    kable_styling() %>% 
  scroll_box(height = "400px")
region_number region locality number_of_chop_bars
1 Western urban 300
1 Western rural 267
2 Central urban 600
2 Central rural 307
3 Greater Accra urban 4792
3 Greater Accra rural 2198
4 Volta urban 78
4 Volta rural 200
5 Eastern urban 287
5 Eastern rural 500
6 Ashanti urban 621
6 Ashanti rural 5000
7 Western North urban 145
7 Western North rural 200
8 Ahafo urban 339
8 Ahafo rural 339
9 Bono urban 600
9 Bono rural 668
10 Bono East urban 1048
10 Bono East rural 220
11 Oti urban 500
11 Oti rural 273
12 Northern urban 12
12 Northern rural 100
13 Savannah urban 503
13 Savannah rural 786
14 North East urban 142
14 North East rural 236
15 Upper East urban 326
15 Upper East rural 953
16 Upper West urban 3456
16 Upper West rural 326
Show the code
n_chopbars_df_longformat %>% 
  ggplot(mapping = aes(x = reorder(region,number_of_chop_bars), fill = locality ,  y = number_of_chop_bars)) +
  geom_col(width = 0.8) +
  gssthemes() +
  scale_y_continuous( expand = c( 0, 1 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    labs(x = NULL,
             y = "number of chop bars")+
  coord_flip(clip = "off") +
    scale_fill_manual(values = c(urban = urban_color,
                                                             rural = rural_color))+
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank(),
                axis.text.x = element_text(vjust = 0.5))

Show different categories using color palettes (dodged)

Instead of having the categories stacked, you can put them next to each other (stacked)

Show the code
n_chopbars_df_longformat %>% 
  ggplot(mapping = aes(x = reorder(region,number_of_chop_bars), fill = locality ,  y = number_of_chop_bars)) +
  geom_col(width = 0.8, position = "dodge") +
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2, 
                        position = position_dodge(width = 0.8)) +
  gssthemes() +
  scale_y_continuous( expand = c( 0, 0 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    labs(x = NULL,
             y = "number of chop bars")+
  coord_flip(clip = "off") +
    scale_fill_manual(values = c(urban = urban_color,
                                                             rural = rural_color))+
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank(),
                axis.text.x = element_text(vjust = 0.5))

Bar chart with highlighted category

Sometimes you might want to highlight a single category in a bar chart (for example the National/Ghana average). To highlight also the axis label you need to use the showtext package. It is recommended to put highlight is a similar hue than the other colours. This can be done, using the muted() function from the scales package

Show the code
# this code add the Ghana average
n_chopbars_df_with_national <- n_chopbars_df %>% 
    add_row( region_number = 0,
                         region  = "Ghana",
                         number_of_chop_bars = mean(n_chopbars_df$number_of_chop_bars),
                         urban = mean(n_chopbars_df$urban ),
                         rural= mean(n_chopbars_df$rural) )
Show the code
highlight = function(x, pat, color="black", family="") {
    ifelse(grepl(pat, x), glue("<b style='font-family:{family}; color:{color}'>{x}</b>"), x)
}

# Pipe operator to pass data frame to the next line
n_chopbars_df_with_national %>% 
    mutate(highlight = region == "Ghana") %>% 
  # Create a ggplot object and set the aesthetic mappings
    # use the reorder function to reorder the the bars
  ggplot(mapping = aes(x = reorder(region,number_of_chop_bars), 
                                         y = number_of_chop_bars,
                                          fill = highlight)) +
  geom_col(width = 0.8) +
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2) +
  gssthemes() +
  scale_y_continuous( expand = c( 0, 0 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    scale_x_discrete(labels= function(x) highlight(x, "Ghana", "black")) +
    scale_fill_manual(values = c(`FALSE` = national_color, 
                                                             `TRUE` = scales::muted(national_color)),
                                        guide = "none") +
    labs(x = NULL,
             y = "number of chop bars" )+
  coord_flip(clip = "off") +
    theme(panel.grid.major.x = element_line(color = "gray", size=0.25),
                panel.grid.major.y = element_blank(),
                axis.text.x = element_text(vjust = 0.5),
                axis.text.y = element_markdown()) # to make this work you need to make the y axis a markdown format

Bar chart with long labels

To allow for sufficient plotting space, it is a good idea to add line break to long value labels on the axes. You can use the str_wrap() function to add line breaks

example data (long labels)

Show the code
n_chopbars_df_long_labels  <- tribble(~question, ~count, 
                                                                            "number of households eating at chopbars every day", 678,
                                                                            "number of male headed households eating at chopbars once a week", 1267,
                                                                            "number of female headed households eating at chop bars once a week", 1876)
n_chopbars_df_long_labels %>%
    kable() %>%
    kable_styling()
question count
number of households eating at chopbars every day 678
number of male headed households eating at chopbars once a week 1267
number of female headed households eating at chop bars once a week 1876
Show the code
n_chopbars_df_long_labels %>% 
  ggplot(mapping = aes(x = str_wrap(question, 20), y = count)) +
  geom_col(width = 0.8) +
    geom_text(mapping = aes(label = scales::comma(count)), hjust = -0.2) +
  gssthemes() +
  scale_y_continuous( expand = c( 0, 1 ),
                      limits = nicelimits,
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    labs(x = NULL,
             y = "number of chop bars")+
  coord_flip(clip = "off") +
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank())

Diverging Bar chart

A diverging bar chart is a type of data visualization that displays values of a quantitative variable across two or more categories, where the bars are centered on a common baseline or a central axis. This type of chart is particularly useful for comparing differences in values that can have both positive and negative magnitudes, or when illustrating a relationship between two opposing factors or sentiments.

In a diverging bar chart, the bars extend in opposite directions from the central axis, creating a symmetrical pattern. This layout makes it easy to see the differences between categories and to identify patterns and trends within the data.

When choosing a diverging bar chart, make sure it is appropriate for your dataset and the type of information you want to convey. It is important to note that diverging bar charts may not be suitable for all types of data, especially if the differences between categories are not symmetrical or if the data has no clear central point. In such cases, other visualization methods, like standard bar charts or line charts, might be more suitable.

example data (positive and negative values)

Show the code
change_in_chop_bars_df <- tribble(~region_number, ~region, ~change,
                            1,"Western", -.35, 
                            2, "Central", -.26, 
                            3, "Greater Accra", .67,
                            4, "Volta", -.18, 
                            5, "Eastern", .47, 
                            6, "Ashanti",.56, 
                            7, "Western North", -.2,
                            8,  "Ahafo", .61, 
                            9, "Bono",.12,
                            10,"Bono East", 0.01,
                            11,"Oti",.07,
                            12,"Northern", .11,
                            13,"Savannah", .12,
                            14,"North East",-.31,
                            15,"Upper East",0,
                            16, "Upper West", .265)

change_in_chop_bars_df %>% 
    kable() %>%
    kable_styling() %>% 
  scroll_box(height = "400px")
region_number region change
1 Western -0.350
2 Central -0.260
3 Greater Accra 0.670
4 Volta -0.180
5 Eastern 0.470
6 Ashanti 0.560
7 Western North -0.200
8 Ahafo 0.610
9 Bono 0.120
10 Bono East 0.010
11 Oti 0.070
12 Northern 0.110
13 Savannah 0.120
14 North East -0.310
15 Upper East 0.000
16 Upper West 0.265
Show the code
change_in_chop_bars_df %>% 
    mutate(pos_neg = ifelse(change >0, "positive", "negative")) %>% 
    ggplot(aes(x = reorder(region, change),
                         y = change,
                         fill =pos_neg)) +
  geom_col() +
  gssthemes() +
        scale_fill_manual(values = c(positive = positive_color,
                                                             negative = negative_color))+ 
  scale_y_continuous(labels = scales::percent) +
    labs(x = NULL,
             y = "change in number\nof chop bars")+
  coord_flip(clip = "off") +
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank(),
                axis.text.x = element_text(vjust = 0.5),
                legend.position = "none") +
      geom_hline(yintercept = 0, linetype = "dashed", color = "black")

Bar chart with hidden labels

Sometimes, you might want to hide certain text labels. For example, if the value is too small. In the example below, any value under 12% is hidden.

Show the code
n_chopbars_df_longformat %>%
    group_by(region) %>%
    mutate(percentage = number_of_chop_bars / sum(number_of_chop_bars)) %>%
    ungroup() %>%
    ggplot(aes(x = region,  y = percentage, fill = locality)) +
    geom_col() +
    coord_flip(expand = FALSE,
                         clip = "off") +
    scale_y_continuous(labels = scales::percent) +
    geom_text(aes(
        label = ifelse(percentage < .12, "",
                                     paste0(format(round(percentage * 100, 1), nsmall =1), "%")),
        y = ifelse(locality == "urban", 0.04, 0.91)
    ),
    size = 3.5,
    position = "stack") +
    gssthemes() +
    labs(x = NULL,
             y = "share of chop bars") +
    scale_fill_manual(
        values = c(urban = urban_color,
                             rural = rural_color),
        labels = c("Rural", "Urban")
    ) +
    guides(fill = guide_legend(reverse = TRUE)) +
    theme(
        panel.grid.major.x = element_line(color = "gray", size = 0.25),
        panel.grid.major.y = element_blank(),
        axis.text.x = element_text(vjust = 0.5)
    )

Filled bar chart with from positive to negative

You can use a filled bar chart to show relative shares within a group. This example shows the percentage of different opininions.

example data (sentiment data)

Show the code
sentiment_chop_bars <- tribble(~location, ~ `very positive`, ~ `positive`, ~  `neutral`,  ~ `negative`, ~"very negative",
                "urban", 15.2, 17.3, 50.2, 8, 9.3,
                "rural" , 25.2,34.2,31.2,6,3.4,
                "national", 20.2,25.75, 40.7,7,6.35)

sentiment_chop_bars %>% 
        kable() %>%
    kable_styling() 
location very positive positive neutral negative very negative
urban 15.2 17.30 50.2 8 9.30
rural 25.2 34.20 31.2 6 3.40
national 20.2 25.75 40.7 7 6.35
Show the code
sentiment_chop_bars %>%
    pivot_longer(-location, names_to = "sentiment") %>%
    mutate(percent = value / 100) %>%
    # make sure the sentiment labels are factors so that they can be ordered correctly
    mutate(sentiment = factor(
        sentiment,
        levels = c(
            "very positive",
            "positive",
            "neutral",
            "negative",
            "very negative"
        )
    )) %>%
    ggplot(aes(x = location, y = percent, fill = sentiment)) +
    geom_col(width = 0.8, position = "stack") +
    gssthemes() +
    scale_y_continuous(labels = scales::percent,
                                         expand = c(0, 0)) +
    labs(x = NULL,
             y = "percentage",
             fill = "opinion on chop bars") +
    coord_flip(clip = "off") +
    scale_fill_manual(values = posneutralneg_color_scheme) +
    theme(
        panel.grid.major.x = element_line(color = "gray", size = 0.25),
        panel.grid.major.y = element_blank(),
        legend.position = "top"
    ) +
    guides(fill = guide_legend(
        nrow = 2,
        byrow = TRUE,
        title.position = "top"
    ))

multiple plots next to each other using facet_wrap()

R’s faceting system is a powerful way to make “small multiples”.

Show the code
n_chopbars_df_longformat %>% 
  ggplot(mapping = aes(x = reorder(region,number_of_chop_bars), fill = locality ,  y = number_of_chop_bars)) +
  geom_col(width = 0.8, position = "dodge") +
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2, 
                        position = position_dodge(width = 0.8)) +

  gssthemes() +
  scale_y_continuous( expand =c(0,NA),
                                        limits = c(0, 7000),
                                        labels = scales::comma,
                      breaks = scales::extended_breaks(only.loose = TRUE)) +
    labs(x = NULL,
             y = "number of chop bars")+
  coord_flip(clip = "off") +
    scale_fill_manual(values = c(urban = urban_color,
                                                             rural = rural_color))+
    theme(panel.grid.major.x=element_line(color = "gray", size=0.25),
                panel.grid.major.y=element_blank(),
                legend.position = "none") +
    # use the facet_wrap function to make small multiples
    facet_wrap(~locality)

multiple plots next to each other with separate bar legends

Sometimes using facet_wrap is not sufficient, especially if you want to add different legends to sub-plots, but still want to align different axis of plots into a single plots. To do this you can create 3 seperate plots and merge them using the patchwork package

example data (long format with quarters)

Show the code
n_chopbars_df_region_quarters  <- tribble(~region_number, ~region, ~national, ~urban, ~rural, ~quarter,
1,"Western", 567, 300, 267, "Q1",
2, "Central", 907, 600, 307,"Q1",
3, "Greater Accra", 6990, 4792, 2198, "Q1",
4, "Volta", 278, 78, 200, "Q1",
5, "Eastern", 787, 287, 500,"Q1",
6, "Ashanti",5621, 621, 5000,"Q1",
7, "Western North", 345, 145, 200,"Q1",
8,  "Ahafo", 678, 339, 339,"Q1",
9, "Bono",1268, 600, 668, "Q1",
10,"Bono East", 1268,1048, 220,"Q1",
11,"Oti",773, 500, 273, "Q1",
12,"Northern", 112, 12, 100,"Q1",
13,"Savannah", 1289,503, 786, "Q1",
14,"North East",378, 142, 236,"Q1",
15,"Upper East",1279,326, 953, "Q1",
16, "Upper West", 3782, 3456, 326, "Q1",
1,"Western", 602, 302, 300, "Q2",
2, "Central", 1000, 600, 400,"Q2",
3, "Greater Accra", 7990, 4792, 3198, "Q2",
4, "Volta", 346, 93, 253, "Q2",
5, "Eastern", 887, 387, 500,"Q2",
6, "Ashanti",6000, 2000, 4000,"Q2",
7, "Western North", 445, 195, 250,"Q2",
8,  "Ahafo", 701, 339, 362,"Q2",
9, "Bono",1444, 722, 722, "Q2",
10,"Bono East", 1368,1048, 290,"Q2",
11,"Oti",912, 534, 378, "Q2",
12,"Northern", 122, 17, 105,"Q2",
13,"Savannah", 1289,503, 786, "Q2",
14,"North East",534, 182, 352,"Q2",
15,"Upper East",1510,400, 1110, "Q2",
16, "Upper West", 3991, 3496, 495, "Q2")


n_chopbars_df_region_quarters %>% 
    kable() %>%
    kable_styling() %>% 
  scroll_box(height = "400px")
region_number region national urban rural quarter
1 Western 567 300 267 Q1
2 Central 907 600 307 Q1
3 Greater Accra 6990 4792 2198 Q1
4 Volta 278 78 200 Q1
5 Eastern 787 287 500 Q1
6 Ashanti 5621 621 5000 Q1
7 Western North 345 145 200 Q1
8 Ahafo 678 339 339 Q1
9 Bono 1268 600 668 Q1
10 Bono East 1268 1048 220 Q1
11 Oti 773 500 273 Q1
12 Northern 112 12 100 Q1
13 Savannah 1289 503 786 Q1
14 North East 378 142 236 Q1
15 Upper East 1279 326 953 Q1
16 Upper West 3782 3456 326 Q1
1 Western 602 302 300 Q2
2 Central 1000 600 400 Q2
3 Greater Accra 7990 4792 3198 Q2
4 Volta 346 93 253 Q2
5 Eastern 887 387 500 Q2
6 Ashanti 6000 2000 4000 Q2
7 Western North 445 195 250 Q2
8 Ahafo 701 339 362 Q2
9 Bono 1444 722 722 Q2
10 Bono East 1368 1048 290 Q2
11 Oti 912 534 378 Q2
12 Northern 122 17 105 Q2
13 Savannah 1289 503 786 Q2
14 North East 534 182 352 Q2
15 Upper East 1510 400 1110 Q2
16 Upper West 3991 3496 495 Q2
Show the code
n_chopbars_df_region_quarters_longformat <- n_chopbars_df_region_quarters %>% 
    pivot_longer(-c(region_number, region, quarter),
                             names_to = "locality",
                             values_to = "number_of_chop_bars"
                             )


p_national <- n_chopbars_df_region_quarters_longformat %>% 
    filter(locality == "national") %>% 
    ggplot(mapping = aes(x     = reorder(region,desc(region_number)),
                                             y     = number_of_chop_bars)) +
    geom_col(width = 0.8,
                     aes( alpha = reorder(quarter, desc(quarter))),
                     position =  position_dodge(width = 0.8),
                     fill = national_color)+
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars),
                                                    group = reorder(quarter, desc(quarter))),
                        hjust = -0.2,
                        position = position_dodge(width = 0.8)
    ) +
    gssthemes() +
    scale_y_continuous( limits = c(0, 10000),
                                            labels = scales::comma) +
    scale_alpha_manual(values = seq(
        from = 0.5,
        to = 1,
        length.out = 2
    ),
    guide = guide_legend(reverse = TRUE))+
    labs(x = NULL,
             alpha = NULL,
             y = "number of chop bars",
             title = "National")+
    coord_flip(clip = "off") +
    theme(panel.grid.major.x = element_line(color = "gray", size=0.25),
                panel.grid.major.y = element_blank(),
                legend.position = "top",
                axis.text.x = element_text(vjust = 0.5),
                plot.title=element_text(hjust=0.5),
        plot.subtitle=element_text(hjust=0.5))



p_urban <- n_chopbars_df_region_quarters_longformat %>% 
    filter(locality == "urban") %>% 
    ggplot(mapping = aes(x     = reorder(region,desc(region_number)),
                                             y     = number_of_chop_bars,
                                             group = reorder(quarter, desc(quarter)))) +
    geom_col(width = 0.8,
                     aes( alpha = reorder(quarter, desc(quarter))),
                     position = "dodge",
                     fill = urban_color) +
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2, 
                        position = position_dodge(width = 0.8)) +
    gssthemes() +
    scale_y_continuous( limits = c(0, 10000),
                                            labels = scales::comma) +
    scale_alpha_manual(values = seq(
        from = 0.5,
        to = 1,
        length.out = 2
    ),
    guide = guide_legend(reverse = TRUE))+
    labs(x = NULL,
             alpha = NULL,
             y = "number of chop bars",
             title = "Urban")+
    coord_flip(clip = "off") +
  # move the title text and subtitle text to the middle
  theme(plot.title=element_text(hjust=0.5),
        plot.subtitle=element_text(hjust=0.5),
            panel.grid.major.x = element_line(color = "gray", size=0.25),
                panel.grid.major.y = element_blank(),
                axis.ticks.y= element_blank(),              
                legend.position = "top",
                axis.text.y = element_blank())

p_rural <- n_chopbars_df_region_quarters_longformat %>% 
    filter(locality == "rural") %>% 
    ggplot(mapping = aes(x     = reorder(region,desc(region_number)),
                                             y     = number_of_chop_bars,
                                             group = reorder(quarter, desc(quarter)))) +
    geom_col(width = 0.8,
                     aes( alpha = reorder(quarter, desc(quarter))),
                     position = "dodge",
                     fill = rural_color) +
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2, 
                        position = position_dodge(width = 0.8)) +
    gssthemes() +
    scale_y_continuous( limits = c(0, 10000),
                                            labels = scales::comma) +
    scale_alpha_manual(values = seq(
        from = 0.5,
        to = 1,
        length.out = 2
    ),
    guide = guide_legend(reverse = TRUE))+
    labs(x = NULL,
             alpha = NULL,
             y = "number of chop bars",
             title = "Rural")+
    coord_flip(clip = "off") +
  # move the title text and subtitle text to the middle
  theme(plot.title=element_text(hjust=0.5),
        plot.subtitle=element_text(hjust=0.5),
            panel.grid.major.x = element_line(color = "gray", size=0.25),
                panel.grid.major.y = element_blank(),
                axis.ticks.y= element_blank(),              
                legend.position = "top",
                axis.text.y = element_blank())


p_national + p_urban + p_rural

bar chart with a singe category hightlighted on top

with a bit of effort, a variable can also be shown on its own. In this case you can use facet_grid()

Show the code
n_chopbars_df_with_national   %>% 
    mutate(highlight = region == "Ghana") %>% 
    # Create a ggplot object and set the aesthetic mappings
    # use the reorder function to reorder the the bars
    ggplot(mapping = aes(x = reorder(region,number_of_chop_bars), 
                                             y = number_of_chop_bars,
                                             fill = highlight)) +
    geom_col(width = 0.8) +
    geom_text(mapping = aes(label = scales::comma(number_of_chop_bars)), hjust = -0.2) +
    gssthemes() +
    scale_y_continuous( expand = c( 0, 0 ),
                                            limits = nicelimits,
                                            labels = scales::comma,
                                            breaks = scales::extended_breaks(only.loose = TRUE)) +
    scale_x_discrete(labels= function(x) highlight(x, "Ghana", "black")) +
    scale_fill_manual(values = c(`FALSE` = national_color, 
                                                             `TRUE` = scales::muted(national_color)),
                                        guide = "none") +
    labs(x = NULL,
             y = "number of chop bars" )+
    coord_flip(clip = "off") +
    theme(panel.grid.major.x = element_line(color = "gray", size=0.25),
                panel.grid.major.y = element_blank(),
                axis.text.x = element_text(vjust = 0.5),
                strip.text = element_blank(),
                axis.text.y = element_markdown()) +
    facet_grid(fct_rev(as.factor(highlight))~.,scales = "free",
                         space = "free_y")# to make this work you need to make the y axis a markdown format

Population pyramid

An illustration of the age and gender distribution of a population is called a population pyramid. It is widely used to illustrate both the current trend in population growth and a population’s makeup. In most cases, the chart is divided into two equal parts, one for males and the other for women. The age groups are frequently displayed along a vertical axis, with the youngest age group at the bottom and the oldest at the top. To make sure the ggplot organises that age categorically, instead of alphabetically, you need to make sure you make the age grouping a factor variable. You can use scale_y_continuous(labels = abs) to make the variables at the base absolute values.

example data (Population pyramid)

Show the code
# Sample data
age_pyramid_df <- data.frame(
  age_group = rep(c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75+"), 2),
  gender = rep(c("Male", "Female"), each = 16),
  count = c(100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50, 45, 40, 35, 30, 25,
            105, 100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50, 45, 40, 35, 30)
) %>% 
    mutate(age_group  = factor(age_group, levels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75+")))


age_pyramid_df %>% 
    kable() %>%
    kable_styling() %>% 
  scroll_box(height = "400px")
age_group gender count
0-4 Male 100
5-9 Male 95
10-14 Male 90
15-19 Male 85
20-24 Male 80
25-29 Male 75
30-34 Male 70
35-39 Male 65
40-44 Male 60
45-49 Male 55
50-54 Male 50
55-59 Male 45
60-64 Male 40
65-69 Male 35
70-74 Male 30
75+ Male 25
0-4 Female 105
5-9 Female 100
10-14 Female 95
15-19 Female 90
20-24 Female 85
25-29 Female 80
30-34 Female 75
35-39 Female 70
40-44 Female 65
45-49 Female 60
50-54 Female 55
55-59 Female 50
60-64 Female 45
65-69 Female 40
70-74 Female 35
75+ Female 30
Show the code
age_pyramid_df %>% 
    ggplot() +
  geom_bar(aes(x = age_group, 
                         y = ifelse(gender == "Male", -count, count), fill = gender), stat = "identity") +
  scale_y_continuous(labels = abs) +
    scale_fill_manual(values = c("Male" = male_color,
                                                            "Female" = female_color)) +
  coord_flip() +
    labs(x = "age group", y = "count") +
    gssthemes() 

Line Charts

A line chart is a graphical representation used to display continuous data over a period of time or across one or multiple categories. A user should use a line chart when they want to show trends, patterns, or changes over time, especially for a large number of data points or continuous data.

example data (line)

Show the code
line_chart_df <-tribble(~year, ~elder,
                                            1960, 213477, 
                                            1984, 493359,
                                            2000, 1363410, 
                                            2010, 1640220, 
                                            2021, 1993300) %>% 
    mutate(year = factor(year))

line_chart_df %>% 
    kable() %>%
    kable_styling() 
year elder
1960 213477
1984 493359
2000 1363410
2010 1640220
2021 1993300
Show the code
line_chart_df %>% 
    mutate(year = as.numeric(levels(year))) %>% 
    ggplot(aes(x = year, y = elder)) +
    geom_line(color = national_color, linewidth = 1.5) +
    geom_point(color = national_color, size = 3)+
    scale_x_continuous(breaks = as.numeric(levels(line_chart_df$year))) +
    gssthemes()

Line chart multiple categories

example data (Line Chart with 2 categories)

Show the code
line_chart_df_by_sex <-tribble(~year, ~Male, ~Female,
                                            1960,4572, 2562,
                                            1984,5572, 4572,
                                            2000,6787, 5887,
                                            2010,7211, 6500,
                                            2021, 8000, 7900) %>% 
    pivot_longer(-c(year), 
                             names_to = "sex",
                             values_to = "number")

line_chart_df_by_sex %>% 
    kable() %>%
    kable_styling() %>% 
  scroll_box(height = "400px")
year sex number
1960 Male 4572
1960 Female 2562
1984 Male 5572
1984 Female 4572
2000 Male 6787
2000 Female 5887
2010 Male 7211
2010 Female 6500
2021 Male 8000
2021 Female 7900
Show the code
line_chart_df_by_sex %>%
    ggplot(aes(x = year, y = number, color = sex )) +
    geom_line(linewidth = 1.5) +
    geom_point(size = 3) +
    scale_y_continuous() + 
    scale_color_manual(values = c("Male" = male_color,
                                                             "Female" = female_color))  +
        scale_x_continuous(breaks = as.numeric(levels(line_chart_df$year))) +
        scale_y_continuous( expand = c( 0, 0 ),
                                            limits = nicelimits,
                                            labels = scales::comma,
                                            breaks = scales::extended_breaks(only.loose = TRUE)) +

    gssthemes() +
    theme(legend.key = element_blank())

Line chart multiple categories with labels

To add value labels to a line chart, you can use the geom_text() function or the geom_text_repel() function from the ggrepel package if you want to make sure the labels do not overlap. You can use show.legend=FALSE to make sure the legend does not contain text.

Show the code
line_chart_df_by_sex %>%
    ggplot(aes(x = year, y = number, color = sex)) +
    geom_line(linewidth = 1.5) +
    geom_point(size = 3) +
    geom_text(
        aes(label = number),
        vjust = ifelse(line_chart_df_by_sex$sex == "Male",-1, 2),
        show.legend = FALSE
    ) +
    scale_color_manual(values = c("Male" = male_color,
                                                                "Female" = female_color))  +
    scale_x_continuous(breaks = as.numeric(levels(line_chart_df$year)),
                                         limits = c(1960, 2030)) +
    scale_y_continuous(
        expand = c(0 , 0),
        limits = c(0, 9000),
        labels = scales::comma,
        breaks = scales::extended_breaks(only.loose = TRUE)
    ) +
    gssthemes() +
    theme(legend.key = element_blank())

Line chart with group labels showing at end of line

Instead of a legend, you can also add the line labels to the end of the lines

Show the code
library(ggrepel)
line_chart_df_by_sex %>%
    # change the sex label so that it only shows up at the end
    # by making it NA for all other years
    mutate(sex_label = ifelse(year == max(year), as.character(sex), NA_character_)) %>% 
    ggplot(aes(x = year, y = number, color = sex)) +
    geom_line(linewidth = 1.5) +
    geom_point(size = 3) +
  geom_text_repel(aes(label = sex_label), hjust = -0.2,
                                  xlim = c(max(line_chart_df_by_sex$year) +5, NA),
                                direction = "y",
                                    hjust         = 0,
                                       segment.curvature = -1e-20,
                                segment.angle = 90,
                                  min.segment.length = 0.2,
                                        show.legend = FALSE) +
    scale_color_manual(values = c("Male" = male_color,
                                                                "Female" = female_color))  +
    scale_x_continuous(breaks = unique(line_chart_df_by_sex$year),
                                         limits = c(1960, max(line_chart_df_by_sex$year) + 20)) +
    scale_y_continuous(
        expand = c(0 , 0),
        limits = c(0, 9000),
        labels = scales::comma,
        breaks = scales::extended_breaks(only.loose = TRUE)
        ) +
    gssthemes() +
    theme(legend.position = "none")

step chart

A step chart is a suitable choice over a line chart when you want to represent data that changes at specific intervals and remains constant between those intervals. It is particularly useful for visualizing data with discrete jumps or where the value holds steady until the next change occurs.

Step charts are often used in scenarios where data has a clear start and end point for each period, such as price changes, tax rate adjustments, or policy changes. By using a step chart, you can emphasize the moments when the changes happen and highlight the periods of stability between them, making it easier to identify and analyze the impacts of these changes. In contrast, a line chart would connect the data points directly, which might imply a continuous transition between values, potentially misrepresenting the nature of the data.

Show the code
line_chart_df %>% 
    mutate(year = as.numeric(levels(year))) %>% 
    ggplot(aes(x = year, y = elder)) +
    geom_step(color = national_color, linewidth = 1.5) +
    scale_x_continuous(breaks = as.numeric(levels(line_chart_df$year)))  +
      gssthemes() +
  scale_y_continuous( expand = c( 0, 0 ),
                      limits = c(0,2500000),
                      breaks = scales::extended_breaks(only.loose = TRUE),
                                        labels = scales::comma)

Smoothed line chart

In some cases, you might want to use a smoothed line (that still goes through all the points). In this case you can use geom_xspline(spline_shape = -0.5) from the ggalt package

Show the code
library(ggalt)
line_chart_df %>% 
    mutate(year = as.numeric(levels(year))) %>% 
    ggplot(aes(x = year, y = elder)) +
    geom_point(color = national_color, size =3)+ 
    ggalt::geom_xspline(color = national_color, size = 1.5, spline_shape = -0.5) +
    scale_x_continuous(breaks = as.numeric(levels(line_chart_df$year))) +
    gssthemes()

lollipop chart

A lollipop chart is used visually represent time series data where magnitude of occurrence is to be communicated. It shows the relationship between a numeric and a categorical variable, as well as identifying trends or patterns over time. They can be used to display numerical data such as quantities and magnitude. To make sure that the data is shown above the right year, you can use as.factor() to make the numeric value a factor

Show the code
# Create lollipop plot
line_chart_df %>%
    ggplot(mapping=aes(x =year, y = elder)) + 
  geom_segment(aes(xend = year, yend = 0), linewidth = 1) +
  geom_point(aes(size = elder),colour=national_color) +
  geom_text(aes(label =scales::comma(elder)), vjust= ifelse(line_chart_df$elder<500000, -1.5,-4))+
  scale_size(range=c(2,24)) +
  labs(x = "Census Year", y = "Population") +
  gssthemes() +
  theme(legend.position = "none") +
  scale_y_continuous( expand = c( 0, 0 ),
                                        # due to bubble size scale limit has to be manually set
                      limits = c(0,2500000),
                      breaks = scales::extended_breaks(only.loose = TRUE),
                                        labels = scales::comma)

Area charts

Area chart (single category)

An area chart is a graphical representation used to display quantitative data over a period of time or across different categories. A user should use an area chart when they want to emphasize the magnitude of change or cumulative totals, visualize the composition of data, or show trends across multiple series while highlighting their differences.

Show the code
line_chart_df %>% 
    mutate(year = as.numeric(levels(year))) %>% 
    ggplot(aes(x = year, y = elder)) +
    geom_line(color = national_color, linewidth = 1.5) +
    geom_area(fill = national_color, alpha = .5)+
    scale_x_continuous(breaks = as.numeric(levels(line_chart_df$year))) +
    gssthemes()

Area chart (stacked)

Show the code
line_chart_df_by_sex %>% 
    ggplot(aes(x=year, y=number, fill=sex)) + 
    geom_area() +
    scale_fill_manual(values = c("Male" = male_color,
                                                             "Female" = female_color)) +
    scale_y_continuous( expand = c( 0, 0 ),
                                            # due to bubble size scale limit has to be manually set
                                            breaks = scales::extended_breaks(only.loose = TRUE),
                                            labels = scales::comma,
                                            limits = nicelimits) +
    gssthemes()

Area chart (Proportional)

In a proportional stacked area chart, the total for each year consistently amounts to one hundred percent, with the values of individual groups expressed as percentages. First you need to compute these relative percentages for each year

Show the code
line_chart_df_by_sex  %>%
  group_by(year ) %>%
  mutate(percentage = number / sum(number)) %>% 
    ggplot(aes(x=year, y= percentage, fill=sex)) + 
    geom_area(alpha = .7) +
    scale_fill_manual(values = c("Male" = male_color,
                                                             "Female" = female_color)) +
    scale_y_continuous( expand = c( 0, 0 ),
                                            # due to bubble size scale limit has to be manually set
                                            breaks = scales::extended_breaks(only.loose = TRUE),
                                            labels = scales::percent,
                                            limits = nicelimits) +
    gssthemes()

Ribbon chart

One can use a ribbon chart to put some extra attention to the difference between two lines. To do so, you will have to create two dataframes, one in long format and one in wide format.

Show the code
# Create the data frame
area_df <- tibble(
    year = c(2010, 2011, 2012, 2013, 2014, 2015),
    `rural growth` = c(2.1, 2.5, 2.8, 3.2, 3.5, 3.9),
    `urban growth` = c(2.8, 3.1, 3.4, 3.7, 4.0, 4.3)
)
Show the code
area_df_long <- area_df %>%
    pivot_longer(-year)
# Create the plot
ggplot() +
    geom_line(data = area_df_long,
                        aes(x = year, y = value, col = name),
                        linewidth = 2) +
    geom_ribbon(
        data = area_df,
        aes(x = year,
                ymin = `rural growth`,
                ymax =  `urban growth`),
        alpha = 0.2,
        fill = national_color
    ) +
        scale_color_manual(values = c( `urban growth` = urban_color,
                                                             `rural growth` = rural_color)) +
    labs(color = NULL, x = "Year", y = "Growth (%)") +
    gssthemes() +
    theme(legend.position = "bottom")

Density Chart

A density plot, also known as a kernel density plot, is a sort of graphical data representation that depicts data distribution over a continuous interval or range of values. It is similar to a histogram in that it displays the estimated probability density function of the data rather than the frequency of data points in each bin.

In data analysis and statistics, density plots are often used to show the shape of the data distribution, including information about the mode, skewness, and outliers. They are especially effective for spotting trends in data that other types of plots, such as scatterplots or boxplots, may miss.

example data (density and histogram)

Show the code
# Create a data frame with three columns for agriculture, industry, and services
densitydf <- tibble(agriculture = rnorm(1000, mean = 50, sd = 10),
                   industry = rnorm(1000, mean = 75, sd = 15),
                   services = rnorm(1000, mean = 100, sd = 20))


# Convert the data frame to a long format
densitydf_long <- densitydf %>%
    pivot_longer(cols = everything(), 
                             names_to = "sector")
Show the code
# Create a density plot using geom_density with colors red, blue, and green
densitydf_long %>%
    ggplot(aes(x = value, fill = sector)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c(industry = industry_color,
                                                         agriculture = agric_color,
                                                         services = services_color)) +
  labs(title = "Density Plot for Agriculture, Industry, and Services", x = "Value", y = "Density")+
    gssthemes()

Histogram

Like a density chart, a histogram is a graphical representation of the distribution of a dataset. It is an estimate of the probability distribution of a continuous variable (quantitative variable) or a discrete variable in certain cases. To construct a histogram, the data is divided into a set of intervals, also known as bins or buckets, and the number of data points falling into each bin is represented by the height of a bar. The bins are usually specified as consecutive, non-overlapping intervals of a variable.

Show the code
densitydf_long %>%
    ggplot(aes(x = value, fill = sector)) +
  geom_histogram(alpha = 0.5) +
  scale_fill_manual(values = c(industry = industry_color,
                                                         agriculture = agric_color,
                                                         services = services_color)) +
  labs(title = "Density Plot for Agriculture, Industry, and Services", 
         x = "Value",
         y = "Count")+
    gssthemes()

Histograms (dodged)

Instead of having the bars of the histogram stack you can also choose to have them next to each other.

Show the code
densitydf_long %>%
    ggplot(aes(x = value, fill = sector)) +
  geom_histogram(position = "dodge") +
  scale_fill_manual(values = c(industry = industry_color,
                                                         agriculture = agric_color,
                                                         services = services_color)) +
  labs(title = "Density Plot for Agriculture, Industry, and Services", 
         x = "Value",
         y = "Count")+
    gssthemes() +
    theme(legend.position = "bottom")

Box plot

Boxplots can be used to display the distribution of a numerical data. They are particularly useful for summarizing the spread and skewness of a distribution, as well as identifying outliers. The box in the plot represents the interquartile range (IQR), which spans from the first quartile (Q1) to the third quartile (Q3) of the data. The median is represented by a horizontal line inside the box. To make a boxplot, you can use the geom_boxplot() function.

Show the code
densitydf_long %>%
    ggplot(aes(x = sector, y = value, fill = sector)) +
  geom_boxplot() +
  scale_fill_manual(values = c(industry = industry_color,
                                                         agriculture = agric_color,
                                                         services = services_color)) +
  labs(title = "Boxplot for income in Agriculture, Industry, and Services", 
         x = NULL,
         y = "Income")+
    gssthemes() +
    theme(legend.position = "bottom")

Heatmap

Heatmap is a graphical representation of data that uses colour-coded cells to represent values. Heatmaps are commonly used to visualize the distribution and density of data points within a particular area or to show patterns and correlations in large datasets.

In a heatmap, each cell represents a specific data point or group of data points, and the colour of the cell indicates the value of the data. Heatmaps can be used to visualize changes in data over time, display geographic data, such as population density or weather patterns, highlight outliers or anomalies in a dataset, which may indicate errors or unusual pattern among others

Show the code
# Sample data : Changes in data overtime
heatmap_df <- tibble(
    Sector = c(
        "Agriculture",
        "Manufacturing",
        "Trade",
        "Transport",
        "Services"
    ),
    Q1 = c(3, 4, 5, 6, 7),
    Q2 = c(4, 5, 6, 7, 8),
    Q3 = c(5, 6, 7, 8, 9),
    Q4 = c(6, 7, 8, 9, 10)
) %>%
    pivot_longer(cols = -Sector,
                             names_to = "Quarter")

heatmap_df %>%
    kable() %>%
    kable_styling() %>%
    scroll_box(height = "400px")
Sector Quarter value
Agriculture Q1 3
Agriculture Q2 4
Agriculture Q3 5
Agriculture Q4 6
Manufacturing Q1 4
Manufacturing Q2 5
Manufacturing Q3 6
Manufacturing Q4 7
Trade Q1 5
Trade Q2 6
Trade Q3 7
Trade Q4 8
Transport Q1 6
Transport Q2 7
Transport Q3 8
Transport Q4 9
Services Q1 7
Services Q2 8
Services Q3 9
Services Q4 10
Show the code
# create heatmap using ggplot
heatmap_df %>% 
    ggplot(aes(x = Quarter, y = Sector, fill = value)) +
  geom_tile() +
  scale_fill_gradientn(colours = incidence_color_scheme) +
  theme_minimal() +
  labs(title = "Economic Activity by Sector", x = "Quarter", y = "Sector", fill = "Value") + 
    gssthemes()+
    theme(legend.position = "bottom")

Proportion Charts

Pie charts

A pie chart is a circular statistical picture that divides into slices to represent numerical quantities. Each pie slice symbolizes a certain category, and the size of each slice is inversely proportionate to the amount it represents. Pie charts are used to show how several categories are spread within a whole.

Pie charts are the most effective for comparing parts of an entire. They do not show changes over time or relationships between variables. When there are many categories or not many differences between the groupings, pie charts might be difficult to read and comprehend.

example data (Pie chart with 2 categories)

Show the code
# Sample data
df_pie <- data.frame(locality = c("Urban", "Rural"),
  deaths = c(4500, 2100))


df_pie %>% 
    kable() %>%
    kable_styling()
locality deaths
Urban 4500
Rural 2100
Show the code
# Pie chart
df_pie %>% 
    ggplot() +
  geom_col(aes(x = "", y = deaths, fill = locality)) +
  coord_polar(theta = "y")+
    scale_fill_manual(values = c("Urban" = urban_color, "Rural"=  rural_color)) +
    gssthemes() +
    theme(axis.text = element_blank(),
                axis.ticks.y = element_blank(),
                axis.text.y= element_blank(),
                panel.grid.major.y = element_blank(),
                axis.text.x = element_blank(),
                panel.grid = element_blank()) +
    labs(x = NULL, y = NULL, fill = "Locality")

Pie chart with percentage value labels showing

Show the code
df_pie %>% 
    ggplot(aes(x = "",
                         y = deaths, 
                         fill = locality)) +
  geom_col() +
    coord_polar(theta = 'y') +
    scale_fill_manual(values = c("Urban" = urban_color, "Rural"=  rural_color)) +
    geom_text(aes(label = percent(deaths/sum(deaths, na.rm = T))),
                     color = "white", size = 8, position = position_stack(vjust = 0.5)) +
    gssthemes() +
    theme(axis.text = element_blank(),
                axis.ticks.y = element_blank(),
                axis.text.y= element_blank(),
                panel.grid.major.y = element_blank(),
                axis.text.x = element_blank(),
                panel.grid = element_blank())  +
    labs(x = NULL, y = NULL)

Pie chart with group labels showing on slice and with no legend

Show the code
df_pie %>% 
    ggplot(aes(x = "", y = deaths, fill = locality))+
    geom_col() +
    coord_polar(theta = 'y') +
    geom_text(aes(label = locality, 
                                color = "black"), 
                        size = 8,
                        position = position_stack(vjust = 0.5),
                        vjust = -1.5) +
    scale_fill_manual(values = c("Urban" = urban_color, "Rural"=  rural_color)) +
    geom_text(aes(label = percent(deaths/sum(deaths, na.rm = T))),
                     color = "white", size = 8, position = position_stack(vjust = 0.5)) +
    gssthemes() +
    theme(axis.text = element_blank(),
                axis.ticks.y = element_blank(),
                axis.text.y= element_blank(),
                panel.grid.major.y = element_blank(),
                axis.text.x = element_blank(),
                panel.grid = element_blank(),
                legend.position = "none")  +
    labs(x = NULL, y = NULL)

Show the code
# # Sample data
# df <- data.frame(
#   group = c("A", "B", "C", "D"),
#   value = c(40, 10, 15, 35)
# )
# 
# # Donut chart
# ggplot(df) +
#   geom_col(aes(x = "", y = value, fill = group)) +
#   coord_polar(theta = "y") +
#   theme_void() +
#   xlim(-1.5, 1.5) +
#   annotation_custom(grob = grid::rectGrob(gp = grid::gpar(col = NA, fill = "white")), xmin = -1, xmax = 1, ymin = -1, ymax = 1)+
#   gssthemes()

Waffle chart

A waffle chart, also known as a square pie chart or a mosaic plot, is a type of chart that displays parts of a whole for categorical quantities. It is similar to a pie chart but uses squares instead of wedges to represent the proportions. Each square in the chart represents a fixed quantity and the total number of squares represents the total quantity.

Waffle charts are used in similar situations as pie charts, to show the relative proportions of different categories within a whole. They can be useful when you want to compare parts of a whole or when you want to display multiple pie charts with the same scale in a small space. Waffle charts can also be easier to read and interpret than pie charts when there are many categories or if the differences between the categories are small. To make a waffle chart in R, you can use the waffle() function from the waffle package. This package works woth named vectors, so to use it you need to transform your tibble to a named vector.

example data (waffle chart)

Show the code
data_waffle_chart <- tribble(~Type ,~ Food, ~ number,
                                                        "Starch",  "Fufu",   52,
                                                        "Starch", "Banku",  18,
                                                        "Starch",  "Rice",  10,
                                                        "Fish", "Tilapia", 6,
                                                        "vegetables", "Tomato" , 2,
                                                        "vegetables", "Okra" , 2) 
data_waffle_chart %>% 
    kable() %>%
    kable_styling() 
Type Food number
Starch Fufu 52
Starch Banku 18
Starch Rice 10
Fish Tilapia 6
vegetables Tomato 2
vegetables Okra 2
Show the code
library(waffle)

vect_waffle_chart <- data_waffle_chart %>% pull(number)
names(vect_waffle_chart) <- data_waffle_chart %>% pull(Food)

waffle(vect_waffle_chart) + 
    gssthemes() + 
    theme(axis.text.x = element_blank(),
                axis.text.y = element_blank(),
                axis.ticks = element_blank(),
                legend.position = "bottom") +
    scale_fill_manual(values = neutral_color_scheme)+
    labs(fill = NULL)

Treemap

A treemap is used when you want to visualize hierarchical data or display the composition of a whole using nested rectangles. It is particularly useful for representing large amounts of data with varying proportions in a compact and space-efficient manner. Each rectangle within the treemap represents a category or sub-category, with the size of the rectangle corresponding to the value or magnitude of that specific category. To make a treemap chart in R, you can use the geom_treemap() function from the treemapify package.

Show the code
library(treemapify)

data_waffle_chart %>% 
    ggplot(aes(area = number, fill = Type, label = Food, subgroup = Type)) +
    geom_treemap(colour = "white") +
    geom_treemap_text(grow = T, reflow = T, colour = "white",  family = GSS_font) +
    geom_treemap_subgroup_border( colour = "white") +
    scale_fill_brewer(palette = "Set1") + 
    gssthemes() + 
    theme(axis.text.x = element_blank(),
                axis.text.y = element_blank(),
                legend.position = "bottom") +
    scale_fill_manual(values = neutral_color_scheme[c(1,5, 3)])+
    labs(fill = "type of food")

Dumbell chart

also known as connected dot plot, it shows the changes in a variable between two different conditions or points in time. It is frequently employed to contrast two connected variables across various categories. Each dot on the graph, which is made up of two dots connected by a line or bar, reflects the value of one of the variables for a certain category.

Dumbbell charts are helpful for comparing the range of a variable across various groups or for displaying change over time for multiple groups. To make a waffle chart in R, you can use the geom_dumbell() function from the ggalt package.

Show the code
library(ggalt)

n_chopbars_df %>%
    ggplot() +
    geom_dumbbell(aes(y = region, x = rural, xend = urban),
        size = 1,
        color = national_color,
        size_xend = 5,
        size_x = 5,
        colour_x = rural_color,
        colour_xend = urban_color,
        dot_guide = FALSE,
        dot_guide_size = 0.25,
        show.legend = TRUE
    ) +
    labs(x = NULL, y = NULL)+
    gssthemes()  +
    geom_point(data = tibble(x = NA_integer_, y= NA_integer_, fill = c("urban", "rural")),
                         mapping = aes(x =x, y = y, color =fill)) +
    scale_color_manual(name = "", values = c("urban" = urban_color,
                                                                                        "rural" = rural_color) )  +
    theme(legend.key = element_rect(fill = NA),
                legend.position = "bottom")+
    guides(colour = guide_legend(override.aes = list(size=5)))

Sankey Chart

A Sankey chart (also known as a Sankey diagram) is a specific type of flow diagram, where the width of the bands is proportional to the flow quantity. This makes Sankey charts useful for visualizing a flow or transfer of some quantity between different time point. To make a Sankey chart in ggplot, you can use the ggsankey library.

Show the code
library(ggsankey)

sankey_df <- tribble(~Q1, ~Q2, ~Q3,
                     "Labour force", "Labour force", "Labour force",
                     "Labour force", "Labour force", "Outside Labour force",
                     "Labour force", "Outside Labour force", "Labour force",
                     "Outside Labour force", "Outside Labour force", "Outside Labour force",
                     "Outside Labour force", "Outside Labour force", "Labour force") %>%
  make_long(Q1, Q2, Q3)

sankey_df %>% 
    kable() %>%
    kable_styling() 
x node next_x next_node
Q1 Labour force Q2 Labour force
Q2 Labour force Q3 Labour force
Q3 Labour force NA NA
Q1 Labour force Q2 Labour force
Q2 Labour force Q3 Outside Labour force
Q3 Outside Labour force NA NA
Q1 Labour force Q2 Outside Labour force
Q2 Outside Labour force Q3 Labour force
Q3 Labour force NA NA
Q1 Outside Labour force Q2 Outside Labour force
Q2 Outside Labour force Q3 Outside Labour force
Q3 Outside Labour force NA NA
Q1 Outside Labour force Q2 Outside Labour force
Q2 Outside Labour force Q3 Labour force
Q3 Labour force NA NA
Show the code
ggplot(
    sankey_df,
    aes(
        x = x,
        next_x = next_x,
        node = node,
        next_node = next_node,
        fill = factor(node),
        label = node
    )
) +
    geom_sankey(flow.alpha = 0.5,
                            node.color = 1,
                            show.legend = FALSE) +
    geom_sankey_label(
        size = 3.5,
        color = 1,
        fill = "white",
        family = "century gothic bold"
    ) +
    scale_fill_manual(
        values = c("Outside Labour force" = negative_color, "Labour force" = positive_color),
        labels =  c("Outside Labour force" = "Outside Labour force", "Labour force" =
                                    "Labour force")
    ) +
    scale_x_discrete(position = "top") +
    gssthemes() +
    theme(
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.x = element_blank()
    )