Как представить графы совместной деятельности в R
Материал из Поле цифровой дидактики
## Урок: разные представления месячной сети авторов GitLab-проектов
library(dplyr)
library(lubridate)
library(igraph)
library(tidygraph)
library(ggraph)
URL <- paste0(
"https://raw.githubusercontent.com/patarakin/stat-data/",
"1118a56e7544839d7df91a60df2a25ba577c4dd4/datasets/csv/df_rich_team.csv"
)
df <- readr::read_csv(URL, show_col_types = FALSE) |>
mutate(
commit_time = ymd_hms(commit_time, tz = "UTC"),
commit_date = as.Date(commit_time)
)
glimpse(df)
## 1. Добавляем месяц и функцию для сети авторов за месяц
df_month <- df %>%
mutate(month = floor_date(commit_time, unit = "month"))
glimpse(df_month)
author_network_for_month <- function(data_month) {
edges_ap <- data_month %>%
distinct(author_anon, project_id)
g_bip <- graph_from_data_frame(edges_ap, directed = FALSE)
V(g_bip)$type <- V(g_bip)$name %in% edges_ap$author_anon
proj <- bipartite_projection(g_bip, multiplicity = TRUE)
g_auth <- proj[[2]]
g_auth
}
## 2. Статистика по всем месяцам (для выбора интересных периодов)
months_vec <- sort(unique(df_month$month))
month_stats <- lapply(months_vec, function(m) {
g_m <- df_month %>%
filter(month == m) %>%
author_network_for_month()
comp <- components(g_m)
data.frame(
month = m,
n_authors = vcount(g_m),
n_edges = ecount(g_m),
n_components = comp$no,
max_component_sz = max(comp$csize)
)
}) |> bind_rows()
month_stats
## Выбираем одну дату для примеров (при необходимости заменить)
target_month <- month_stats$month[which.max(month_stats$n_authors)]
target_month
g_target <- df_month %>%
filter(month == target_month) %>%
author_network_for_month()
g_target
## 3. Базовый plot() igraph
plot(
g_target,
vertex.size = 4,
vertex.label = NA,
edge.width = pmin(E(g_target)$weight, 5),
edge.color = "grey80",
layout = layout_with_fr(g_target),
main = paste("Сеть авторов за", target_month, "(layout_with_fr)")
)
## 4. Несколько разных layout’ов igraph
par(mfrow = c(2, 2), mar = c(1, 1, 3, 1))
plot(g_target,
layout = layout_with_fr(g_target),
vertex.size = 3, vertex.label = NA,
main = "Fruchterman–Reingold")
plot(g_target,
layout = layout_with_kk(g_target),
vertex.size = 3, vertex.label = NA,
main = "Kamada–Kawai")
plot(g_target,
layout = layout_in_circle(g_target),
vertex.size = 3, vertex.label = NA,
main = "Circle")
plot(g_target,
layout = layout_with_mds(g_target),
vertex.size = 3, vertex.label = NA,
main = "MDS")
par(mfrow = c(1, 1))
## 5. Перевод в tidygraph + ggraph (force-directed)
g_tbl <- as_tbl_graph(g_target) %>%
activate(nodes) %>%
mutate(
deg = centrality_degree()
)
set.seed(42)
ggraph(g_tbl, layout = "fr") +
geom_edge_link(aes(width = pmin(weight, 5)),
alpha = 0.15, colour = "grey70") +
geom_node_point(aes(size = deg),
colour = "steelblue") +
scale_edge_width(range = c(0.2, 1.5)) +
scale_size(range = c(1, 6)) +
labs(
title = paste("ggraph::layout = 'fr' |", target_month)
) +
theme_graph()
## 6. ggraph: кластеризация и раскраска по сообществам
cl <- igraph::cluster_louvain(g_target)
membership_vec <- igraph::membership(cl)
g_tbl <- g_tbl %>%
activate(nodes) %>%
mutate(
community = factor(membership_vec)
)
set.seed(43)
ggraph(g_tbl, layout = "fr") +
geom_edge_link(alpha = 0.1, colour = "grey80") +
geom_node_point(aes(color = community, size = deg)) +
scale_size(range = c(1, 5)) +
labs(
title = paste("Месячные подкоманды (Louvain communities) |", target_month)
) +
theme_graph()
## 7. ggraph: круговая диаграмма по степеням
set.seed(44)
ggraph(g_tbl, layout = "linear") +
geom_edge_arc(alpha = 0.1, colour = "grey70") +
geom_node_point(aes(size = deg, color = community)) +
coord_fixed() +
labs(
title = paste("Круговое представление сети авторов |", target_month)
) +
theme_graph()
## 8. ggraph: фильтрация по «ядру» сети и более плотное представление
g_core <- g_tbl %>%
activate(nodes) %>%
filter(deg >= quantile(deg, 0.75, na.rm = TRUE))
set.seed(45)
ggraph(g_core, layout = "fr") +
geom_edge_link(alpha = 0.2, colour = "grey70") +
geom_node_point(aes(size = deg, color = community)) +
scale_size(range = c(3, 8)) +
labs(
title = paste("Ядро месячной сети авторов (top 25% по degree) |", target_month)
) +
theme_graph()
