Teacher's Employment Allocations by LGA

Masumbuko Semba

11.08.2022

Tilemap

district.tb %>% 
  hchart(type = "tilemap", hcaes(x = lon, y = lat, name = district, group = zone)) %>% 
  hc_chart(type = "tilemap") %>% 
  hc_plotOptions(
    series = list(
      dataLabels = list(
        enabled = TRUE,
        format = "{point.code}",
        color = "white",
        style = list(textOutline = FALSE)
      )
    )
  ) %>% 
  hc_tooltip(
    headerFormat = "",
    pointFormat = "<b>{point.name}</b> is in <b>{point.region_nam}</b>"
    ) %>% 
  hc_xAxis(visible = FALSE) %>% 
  hc_yAxis(visible = FALSE) %>% 
  hc_size(height = 800, width = 600)

packedbubble

A bubble chart requires three dimensions of data; the x-value and y-value to position the bubble along the value axes and a third value for its volume. Packed Bubble charts have a simpler data structure, a flat, one-dimensional array with volumes is sufficient. The bubble’s x/y position is automatically calculated using an algorithm that packs the bubbles in a cluster. The series data point configuration has support for setting colors and label values. Drag’n drop feature was also added to give the user a chance to quickly move one bubble between series and then check how their relations will change.

walimu.lga = walimu.clean %>% 
  separate(halmashauri, into = c("district", "b", "c"), sep = " ") %>% 
  unite(col = code, b:c, sep = " ") %>% 
  mutate(lga = case_when(code == "District Council"~"DC",
                         code == "Municipal Council"~"MC",
                         code == "City Council"~"CC",
                         code == "Town Council"~"TC",
                         code == "Mikindani Municipal"~"MC",
                         code == "Ujiji Municipal"~"MC"))

  
walimu.lga.freq = walimu.lga %>% 
  group_by(district, lga) %>% 
  count()

district.walimu = district.tb %>% 
  left_join(walimu.lga.freq) %>% 
  select(region_nam, zone, n, district)%>% 
  separate(district, into = c("code", "aa"), sep = 3, remove = FALSE) %>% 
  mutate(code = str_to_upper(code)) %>% 
  select(-aa)

hc = district.walimu %>% 
   hchart(type = "packedbubble", hcaes(name = district, value = n, group = zone))



q95 <- as.numeric(quantile(district.walimu$n, .95, na.rm = TRUE))

hc %>% 
  hc_tooltip(
    useHTML = TRUE,
    pointFormat = "<b>{point.name}:</b> {point.n}"
  ) %>% 
  hc_plotOptions(
    packedbubble = list(
      maxSize = "150%",
      zMin = 0,
      layoutAlgorithm = list(
        gravitationalConstant =  0.05,
        splitSeries =  TRUE, # TRUE to group points
        seriesInteraction = TRUE,
        dragBetweenSeries = TRUE,
        parentNodeLimit = TRUE
      ),
      dataLabels = list(
        enabled = TRUE,
        format = "{point.code}",
        filter = list(
          property = "y",
          operator = ">",
          value = q95
        ),
        style = list(
          color = "black",
          textOutline = "none",
          fontWeight = "normal"
        )
      )
    )
  )

Sankey

A sankey diagram is a visualization used to depict a flow from one set of values to another. The things being connected are called nodes and the connections are called links.Sankey diagrams can also visualize the energy accounts, material flow accounts on a regional or national level, and cost breakdowns.[1] The diagrams are often used in the visualization of material flow analysis.

Sankey diagrams emphasize the major transfers or flows within a system. They help locate the most important contributions to a flow. They often show conserved quantities within defined system boundaries.

   quest.tb =  walimu.clean %>% 
      group_by(kiwango_cha_elimu, jinsi) %>% 
      summarise(value = n(), .groups = "drop") %>% 
      rename(source = 2, target = 1) %>% 
      filter(value > 100)%>% 
      as.data.frame()
        
    
    # From these flows we need to create a node data frame: it lists every entities involved in the flow
    nodes <- data.frame(name=c(as.character(quest.tb$source), 
                               as.character(quest.tb$target)) %>% 
                          unique())
    
    nodes = quest.tb %>% 
      select(-value) %>% 
      pivot_longer(cols = source:target) %>% 
      distinct(value) %>% 
      rename(name = 1) %>% 
      as.data.frame()
    
    # With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
    quest.tb$IDsource=match(quest.tb$source, nodes$name)-1 
    quest.tb$IDtarget=match(quest.tb$target, nodes$name)-1
    
    
    # Make the Network 
    networkD3::sankeyNetwork(Links = quest.tb, 
                             Nodes = nodes,
                             Source = "IDsource", 
                             Target = "IDtarget",
                             Value = "value", 
                             NodeID = "name", 
                             fontFamily = "Myriad Pro",
                             LinkGroup = "source",
                             sinksRight=FALSE,
                             # height = 600, width = 800,
                             # colourScale=ColourScal,
                             nodeWidth=30, 
                             iterations = 5,
                             fontSize=14, 
                             nodePadding=30, 
                             width = 1000, 
                             height = 400)

Chord diagram

A chord diagram represents flows or connections between several entities (called nodes). Each entity is represented by a fragment on the outer part of the circular layout. Then, arcs are drawn between each entities. The size of the arc is proportional to the importance of the flow. In this section I will discuss the transfer of public servants between region in the country. I will use the chord diagram, which makes visual appeal and provide insight in a more clear form.

tamisemi = readxl::read_excel("uhamisho_data.xlsx") %>% 
  janitor::clean_names()

tam.clean = tamisemi %>% 
  separate(col = anakotoka, into = c("lga_toka", "bb", "lga_toka_name"))%>% 
  separate(col = anakoenda, into = c("lga_enda", "bb", "lga_enda_name")) %>% 
  relocate(c(lga_enda, lga_enda_name), .after = lga_toka_name) %>% 
  mutate(lga_toka = if_else(lga_toka=="Manipaa", "Manispaa", lga_toka),
         lga_toka = if_else(lga_toka %in% c("Wiaya", "WIlaya"), "Wilaya", lga_toka),
         lga_enda = if_else(lga_enda=="Manipaa", "Manispaa", lga_enda),
         lga_enda = if_else(lga_enda %in% c("Wiaya", "WIlaya"), "Wilaya", lga_enda))

mikoa = readxl::read_excel("uhamisho_data.xlsx", sheet = 2) %>% 
  janitor::clean_names()

mikoa = mikoa %>% 
  dplyr::select(-2) %>% 
  separate(col = halmashauri, into = c("lga", "desig"), sep = " ") %>% 
  dplyr::select(-desig)

toka = tam.clean %>% 
  dplyr::select(lga_toka_name) %>% 
  left_join(mikoa, by = c("lga_toka_name" = "lga"))

enda = tam.clean %>% 
  dplyr::select(lga_enda_name) %>% 
  left_join(mikoa, by = c("lga_enda_name" = "lga")) %>% 
  rename(mkoa = mikoa) %>% 
  mutate(region = mkoa) %>% 
  slice(-1)

Looking in all region hide crucial information that

mkoa.mkoa = toka %>% 
  bind_cols(enda) %>% 
  group_by(mikoa, region) %>% 
  count() %>% 
  datawizard::data_to_wide(values_from = "n", names_from = "region")


mkoa.matrix = mkoa.mkoa %>% 
  filter(!is.na(mikoa)) %>% 
  column_to_rownames(var = 'mikoa') %>% 
  as.matrix() 


mkoa.matrix %>% 
  chorddiag(
    type = "bipartite", 
            showTicks = F, tickInterval = 2,
            groupnameFontsize = 14, 
            groupnamePadding = 10, 
    groupColors = hcl.colors(n = 30, palette = "Berlin"),
            margin = 90, 
            showGroupnames = T
  )

Let us single out a region. for this case I choose Mara region and see

mtoko = mkoa.mkoa %>% 
  filter(mikoa == "Mara") %>% 
  column_to_rownames(var = 'mikoa') %>% 
  as.matrix() 


mtoko %>% 
  chorddiag(
    type = "bipartite", 
    showTicks = F, 
    tickInterval = 2,
    groupnameFontsize = 14, 
    groupnamePadding = 10, 
    groupColors = hcl.colors(n = 27, palette = "Zissou 1"),
    margin = 90, 
    showGroupnames = T
  )