Как посчитать метрики командности (GitLab)/Code

Материал из Поле цифровой дидактики

Только код

library(tidyverse)
library(lubridate)
library(ineq)
library(scales)
library(knitr)

URL <- paste0(
  "https://raw.githubusercontent.com/patarakin/stat-data/",
  "1118a56e7544839d7df91a60df2a25ba577c4dd4/datasets/csv/df_rich_team.csv"
)

df <- read_csv(URL, show_col_types = FALSE) |>
  mutate(
    commit_time = ymd_hms(commit_time, tz = "UTC"),
    commit_date = as.Date(commit_time)
  )

glimpse(df)

##-- Functions
gini_contrib <- function(changes_vec) {
  if (length(changes_vec) < 2 || sum(changes_vec) == 0) return(0)
  ineq::Gini(changes_vec)
}

succession_ratio <- function(authors_vec) {
  n <- length(authors_vec)
  if (n < 2) return(NA_real_)
  sum(authors_vec[-1] != authors_vec[-n]) / (n - 1)
}

burstiness <- function(times_vec) {
  times_vec <- sort(times_vec)
  if (length(times_vec) < 3) return(NA_real_)
  deltas <- as.numeric(diff(times_vec), units = "secs")
  deltas <- deltas[deltas > 0]
  if (length(deltas) < 2) return(NA_real_)
  mu <- mean(deltas)
  sigma <- sd(deltas)
  if ((sigma + mu) == 0) return(NA_real_)
  (sigma - mu) / (sigma + mu)
}

shared_activity_index <- function(author_vec, date_vec) {
  authors <- unique(author_vec)
  n <- length(authors)
  if (n < 2) return(0)
  
  dates_by_author <- split(date_vec, author_vec)
  pairs <- combn(authors, 2, simplify = FALSE)
  
  overlap_count <- sum(vapply(pairs, function(p) {
    length(intersect(dates_by_author[[p[1]]],
                     dates_by_author[[p[2]]])) > 0
  }, logical(1)))
  
  overlap_count / length(pairs)
}

mean_response_minutes <- function(authors_vec, times_vec) {
  n <- length(authors_vec)
  if (n < 2) return(NA_real_)
  intervals <- numeric(0)
  for (i in seq(2, n)) {
    if (authors_vec[i] != authors_vec[i - 1]) {
      delta <- as.numeric(difftime(times_vec[i],
                                   times_vec[i - 1],
                                   units = "mins"))
      intervals <- c(intervals, delta)
    }
  }
  if (length(intervals) == 0) return(NA_real_)
  mean(intervals)
}

#--------------------------
author_contrib <- df |>
  group_by(project_id, author_anon) |>
  summarise(author_changes = sum(total_changes), .groups = "drop")

gini_by_project <- author_contrib |>
  group_by(project_id) |>
  summarise(
    gini_contribution = gini_contrib(author_changes),
    .groups = "drop"
  )

project_metrics <- df |>
  arrange(project_id, commit_time) |>
  group_by(project_id) |>
  summarise(
    # Inputs
    n_authors     = n_distinct(author_anon),
    n_commits     = n(),
    total_changes = sum(total_changes),
    duration_days = as.numeric(
      difftime(max(commit_time), min(commit_time), units = "days")
    ),
    
    # Action processes
    succession_ratio = succession_ratio(author_anon),
    burstiness       = burstiness(commit_time),
    commit_rate_per_day = n() / pmax(
      as.numeric(difftime(max(commit_time),
                          min(commit_time), units = "days")),
      0.01
    ),
    
    # Emergent states
    shared_activity_index =
      shared_activity_index(author_anon, commit_date),
    
    # Output: зрелость работы
    refactoring_ratio = sum(deletions) / pmax(sum(total_changes), 1),
    
    # Stigmergy: скорость реакции
    mean_response_min =
      mean_response_minutes(author_anon, commit_time),
    
    .groups = "drop"
  ) |>
  left_join(gini_by_project, by = "project_id")

glimpse(project_metrics)

# Нормируем ключевые метрики и агрегируем их в '''индекс командности'''.

minmax_norm <- function(x) {
  rng <- range(x, na.rm = TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng) / diff(rng)[1]
}

project_metrics <- project_metrics |>
  mutate(
    balance_score   = minmax_norm(1 - gini_contribution),
    stigmergy_score = minmax_norm(succession_ratio),
    sync_score      = minmax_norm(shared_activity_index),
    teamwork_index  = (balance_score + stigmergy_score + sync_score) / 3,
    teamwork_tier   = ntile(teamwork_index, 4) |>
      factor(levels = 1:4,
             labels = c("Q1_low", "Q2", "Q3", "Q4_high"))
  )

glimpse(project_metrics)

tier_summary <- project_metrics |>
  group_by(teamwork_tier) |>
  summarise(
    n_projects      = n(),
    avg_authors     = mean(n_authors),
    avg_commits     = mean(n_commits),
    avg_gini        = mean(gini_contribution),
    avg_succession  = mean(succession_ratio),
    avg_shared      = mean(shared_activity_index),
    avg_refactoring = mean(refactoring_ratio),
    median_resp_min = median(mean_response_min, na.rm = TRUE),
    .groups = "drop"
  )

kable(tier_summary, digits = 3,
      caption = "Характеристики проектов по квартилям командности")

glimpse(tier_summary)

#---------------

ggplot(project_metrics,
       aes(x = teamwork_tier, y = succession_ratio,
           fill = teamwork_tier)) +
  geom_boxplot(alpha = 0.7, outlier.size = 0.8, outlier.alpha = 0.4) +
  stat_summary(fun = mean, geom = "point", shape = 18,
               size = 3, colour = "white") +
  scale_fill_manual(values = c("#c0392b","#e67e22","#2980b9","#27ae60")) +
  labs(
    title    = "Стигмергический обмен следами по уровням командности",
    subtitle = "Succession ratio = доля коммитов, где следующий автор — другой участник",
    x        = "Уровень командности (IMOI–Stigmergy)",
    y        = "Succession ratio"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

#----  Компоненты индекса командности

components_long <- project_metrics |>
  group_by(teamwork_tier) |>
  summarise(
    `Баланс участия`   = mean(balance_score,   na.rm = TRUE),
    `Stigmergy (след)` = mean(stigmergy_score, na.rm = TRUE),
    `Синхронность`     = mean(sync_score,      na.rm = TRUE),
    .groups = "drop"
  ) |>
  pivot_longer(-teamwork_tier,
               names_to = "component", values_to = "value")

ggplot(components_long,
       aes(x = teamwork_tier, y = value, fill = component)) +
  geom_col(position = "dodge", alpha = 0.85) +
  scale_fill_manual(values = c("#2980b9","#27ae60","#e67e22")) +
  labs(
    title    = "Компоненты индекса командности по квартилям",
    subtitle = "IMOI–Stigmergy: баланс участия, стигмергия, синхронность",
    x        = "Уровень командности",
    y        = "Нормированное значение (0–1)",
    fill     = "Компонент"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

# --  Радар-график профилей квартилей

library(fmsb) 

radar_data <- project_metrics |>
  group_by(teamwork_tier) |>
  summarise(
    balance   = mean(balance_score,   na.rm = TRUE),
    stigmergy = mean(stigmergy_score, na.rm = TRUE),
    sync      = mean(sync_score,      na.rm = TRUE),
    .groups = "drop"
  )

radar_df <- rbind(
  max = c(1, 1, 1),
  min = c(0, 0, 0),
  radar_data |> select(balance, stigmergy, sync)
)
rownames(radar_df) <- c("max", "min", as.character(radar_data$teamwork_tier))

radarchart(radar_df,
           axistype = 1,
           pcol = c("red","orange","blue","darkgreen"),
           plwd = 2, plty = 1,
           title = "Профили командности IMOI–Stigmergy по квартилям")
legend("topright", legend = as.character(radar_data$teamwork_tier),
       col = c("red","orange","blue","darkgreen"), lwd = 2, bty = "n")

#------------------- Баланс вклада vs стигмергия

plot_data <- project_metrics |>
  filter(!is.na(gini_contribution), !is.na(succession_ratio)) |>
  mutate(
    tier = factor(teamwork_tier,
                  levels = c("Q1_low","Q2","Q3","Q4_high"),
                  labels = c("Q1 low","Q2","Q3","Q4 high"))
  )

ggplot(plot_data,
       aes(x = gini_contribution,
           y = succession_ratio,
           color = tier)) +
  geom_point(alpha = 0.35, size = 1.2) +
  geom_smooth(aes(group = 1),
              method = "loess", formula = y ~ x,
              se = TRUE, color = "black",
              linewidth = 1.2, linetype = "solid") +
  geom_vline(xintercept = 0.20, linetype = "dashed",
             color = "steelblue", linewidth = 0.8) +
  geom_vline(xintercept = 0.50, linetype = "dashed",
             color = "tomato", linewidth = 0.8) +
  annotate("text", x = 0.10, y = 0.97,
           label = "Zone A\nequal", color = "steelblue",
           size = 3.2, fontface = "italic") +
  annotate("text", x = 0.34, y = 0.97,
           label = "Zone B\noptimal", color = "grey20",
           size = 3.2, fontface = "bold") +
  annotate("text", x = 0.70, y = 0.97,
           label = "Zone C\ndominance", color = "tomato",
           size = 3.2, fontface = "italic") +
  scale_color_manual(
    values = c("Q1 low"  = "#c0392b",
               "Q2"      = "#e67e22",
               "Q3"      = "#2980b9",
               "Q4 high" = "#27ae60"),
    name = "Teamwork tier"
  ) +
  labs(
    title   = "Contribution balance vs Stigmergic exchange",
    subtitle= "Gini coefficient vs Succession ratio | N ≈ 3,400 GitLab-проектов",
    x       = "Gini of contribution (0 = equal, 1 = one author does all)",
    y       = "Succession ratio (fraction of commits: next author != previous)",
    caption = "Black line = LOESS trend; dashed lines = zone boundaries"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title    = element_text(face = "bold"),
    plot.subtitle = element_text(color = "grey40"),
    legend.position = "right"
  )