Как посчитать метрики командности (GitLab)/Code
Материал из Поле цифровой дидактики
Только код
library(tidyverse)
library(lubridate)
library(ineq)
library(scales)
library(knitr)
URL <- paste0(
"https://raw.githubusercontent.com/patarakin/stat-data/",
"1118a56e7544839d7df91a60df2a25ba577c4dd4/datasets/csv/df_rich_team.csv"
)
df <- read_csv(URL, show_col_types = FALSE) |>
mutate(
commit_time = ymd_hms(commit_time, tz = "UTC"),
commit_date = as.Date(commit_time)
)
glimpse(df)
##-- Functions
gini_contrib <- function(changes_vec) {
if (length(changes_vec) < 2 || sum(changes_vec) == 0) return(0)
ineq::Gini(changes_vec)
}
succession_ratio <- function(authors_vec) {
n <- length(authors_vec)
if (n < 2) return(NA_real_)
sum(authors_vec[-1] != authors_vec[-n]) / (n - 1)
}
burstiness <- function(times_vec) {
times_vec <- sort(times_vec)
if (length(times_vec) < 3) return(NA_real_)
deltas <- as.numeric(diff(times_vec), units = "secs")
deltas <- deltas[deltas > 0]
if (length(deltas) < 2) return(NA_real_)
mu <- mean(deltas)
sigma <- sd(deltas)
if ((sigma + mu) == 0) return(NA_real_)
(sigma - mu) / (sigma + mu)
}
shared_activity_index <- function(author_vec, date_vec) {
authors <- unique(author_vec)
n <- length(authors)
if (n < 2) return(0)
dates_by_author <- split(date_vec, author_vec)
pairs <- combn(authors, 2, simplify = FALSE)
overlap_count <- sum(vapply(pairs, function(p) {
length(intersect(dates_by_author[[p[1]]],
dates_by_author[[p[2]]])) > 0
}, logical(1)))
overlap_count / length(pairs)
}
mean_response_minutes <- function(authors_vec, times_vec) {
n <- length(authors_vec)
if (n < 2) return(NA_real_)
intervals <- numeric(0)
for (i in seq(2, n)) {
if (authors_vec[i] != authors_vec[i - 1]) {
delta <- as.numeric(difftime(times_vec[i],
times_vec[i - 1],
units = "mins"))
intervals <- c(intervals, delta)
}
}
if (length(intervals) == 0) return(NA_real_)
mean(intervals)
}
#--------------------------
author_contrib <- df |>
group_by(project_id, author_anon) |>
summarise(author_changes = sum(total_changes), .groups = "drop")
gini_by_project <- author_contrib |>
group_by(project_id) |>
summarise(
gini_contribution = gini_contrib(author_changes),
.groups = "drop"
)
project_metrics <- df |>
arrange(project_id, commit_time) |>
group_by(project_id) |>
summarise(
# Inputs
n_authors = n_distinct(author_anon),
n_commits = n(),
total_changes = sum(total_changes),
duration_days = as.numeric(
difftime(max(commit_time), min(commit_time), units = "days")
),
# Action processes
succession_ratio = succession_ratio(author_anon),
burstiness = burstiness(commit_time),
commit_rate_per_day = n() / pmax(
as.numeric(difftime(max(commit_time),
min(commit_time), units = "days")),
0.01
),
# Emergent states
shared_activity_index =
shared_activity_index(author_anon, commit_date),
# Output: зрелость работы
refactoring_ratio = sum(deletions) / pmax(sum(total_changes), 1),
# Stigmergy: скорость реакции
mean_response_min =
mean_response_minutes(author_anon, commit_time),
.groups = "drop"
) |>
left_join(gini_by_project, by = "project_id")
glimpse(project_metrics)
# Нормируем ключевые метрики и агрегируем их в '''индекс командности'''.
minmax_norm <- function(x) {
rng <- range(x, na.rm = TRUE)
if (diff(rng) == 0) return(rep(0, length(x)))
(x - rng) / diff(rng)[1]
}
project_metrics <- project_metrics |>
mutate(
balance_score = minmax_norm(1 - gini_contribution),
stigmergy_score = minmax_norm(succession_ratio),
sync_score = minmax_norm(shared_activity_index),
teamwork_index = (balance_score + stigmergy_score + sync_score) / 3,
teamwork_tier = ntile(teamwork_index, 4) |>
factor(levels = 1:4,
labels = c("Q1_low", "Q2", "Q3", "Q4_high"))
)
glimpse(project_metrics)
tier_summary <- project_metrics |>
group_by(teamwork_tier) |>
summarise(
n_projects = n(),
avg_authors = mean(n_authors),
avg_commits = mean(n_commits),
avg_gini = mean(gini_contribution),
avg_succession = mean(succession_ratio),
avg_shared = mean(shared_activity_index),
avg_refactoring = mean(refactoring_ratio),
median_resp_min = median(mean_response_min, na.rm = TRUE),
.groups = "drop"
)
kable(tier_summary, digits = 3,
caption = "Характеристики проектов по квартилям командности")
glimpse(tier_summary)
#---------------
ggplot(project_metrics,
aes(x = teamwork_tier, y = succession_ratio,
fill = teamwork_tier)) +
geom_boxplot(alpha = 0.7, outlier.size = 0.8, outlier.alpha = 0.4) +
stat_summary(fun = mean, geom = "point", shape = 18,
size = 3, colour = "white") +
scale_fill_manual(values = c("#c0392b","#e67e22","#2980b9","#27ae60")) +
labs(
title = "Стигмергический обмен следами по уровням командности",
subtitle = "Succession ratio = доля коммитов, где следующий автор — другой участник",
x = "Уровень командности (IMOI–Stigmergy)",
y = "Succession ratio"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
#---- Компоненты индекса командности
components_long <- project_metrics |>
group_by(teamwork_tier) |>
summarise(
`Баланс участия` = mean(balance_score, na.rm = TRUE),
`Stigmergy (след)` = mean(stigmergy_score, na.rm = TRUE),
`Синхронность` = mean(sync_score, na.rm = TRUE),
.groups = "drop"
) |>
pivot_longer(-teamwork_tier,
names_to = "component", values_to = "value")
ggplot(components_long,
aes(x = teamwork_tier, y = value, fill = component)) +
geom_col(position = "dodge", alpha = 0.85) +
scale_fill_manual(values = c("#2980b9","#27ae60","#e67e22")) +
labs(
title = "Компоненты индекса командности по квартилям",
subtitle = "IMOI–Stigmergy: баланс участия, стигмергия, синхронность",
x = "Уровень командности",
y = "Нормированное значение (0–1)",
fill = "Компонент"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")
# -- Радар-график профилей квартилей
library(fmsb)
radar_data <- project_metrics |>
group_by(teamwork_tier) |>
summarise(
balance = mean(balance_score, na.rm = TRUE),
stigmergy = mean(stigmergy_score, na.rm = TRUE),
sync = mean(sync_score, na.rm = TRUE),
.groups = "drop"
)
radar_df <- rbind(
max = c(1, 1, 1),
min = c(0, 0, 0),
radar_data |> select(balance, stigmergy, sync)
)
rownames(radar_df) <- c("max", "min", as.character(radar_data$teamwork_tier))
radarchart(radar_df,
axistype = 1,
pcol = c("red","orange","blue","darkgreen"),
plwd = 2, plty = 1,
title = "Профили командности IMOI–Stigmergy по квартилям")
legend("topright", legend = as.character(radar_data$teamwork_tier),
col = c("red","orange","blue","darkgreen"), lwd = 2, bty = "n")
#------------------- Баланс вклада vs стигмергия
plot_data <- project_metrics |>
filter(!is.na(gini_contribution), !is.na(succession_ratio)) |>
mutate(
tier = factor(teamwork_tier,
levels = c("Q1_low","Q2","Q3","Q4_high"),
labels = c("Q1 low","Q2","Q3","Q4 high"))
)
ggplot(plot_data,
aes(x = gini_contribution,
y = succession_ratio,
color = tier)) +
geom_point(alpha = 0.35, size = 1.2) +
geom_smooth(aes(group = 1),
method = "loess", formula = y ~ x,
se = TRUE, color = "black",
linewidth = 1.2, linetype = "solid") +
geom_vline(xintercept = 0.20, linetype = "dashed",
color = "steelblue", linewidth = 0.8) +
geom_vline(xintercept = 0.50, linetype = "dashed",
color = "tomato", linewidth = 0.8) +
annotate("text", x = 0.10, y = 0.97,
label = "Zone A\nequal", color = "steelblue",
size = 3.2, fontface = "italic") +
annotate("text", x = 0.34, y = 0.97,
label = "Zone B\noptimal", color = "grey20",
size = 3.2, fontface = "bold") +
annotate("text", x = 0.70, y = 0.97,
label = "Zone C\ndominance", color = "tomato",
size = 3.2, fontface = "italic") +
scale_color_manual(
values = c("Q1 low" = "#c0392b",
"Q2" = "#e67e22",
"Q3" = "#2980b9",
"Q4 high" = "#27ae60"),
name = "Teamwork tier"
) +
labs(
title = "Contribution balance vs Stigmergic exchange",
subtitle= "Gini coefficient vs Succession ratio | N ≈ 3,400 GitLab-проектов",
x = "Gini of contribution (0 = equal, 1 = one author does all)",
y = "Succession ratio (fraction of commits: next author != previous)",
caption = "Black line = LOESS trend; dashed lines = zone boundaries"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(color = "grey40"),
legend.position = "right"
)
