Обсуждение:Как посчитать метрики командности (GitLab): различия между версиями

Материал из Поле цифровой дидактики
Новая страница: «== Только код == <syntaxhighlight lang="R" line> library(tidyverse) library(lubridate) library(ineq) library(scales) library(knitr) URL <- paste0( "https://raw.githubusercontent.com/patarakin/stat-data/", "1118a56e7544839d7df91a60df2a25ba577c4dd4/datasets/csv/df_rich_team.csv" ) df <- read_csv(URL, show_col_types = FALSE) |> mutate( commit_time = ymd_hms(commit_time, tz = "UTC"), commit_date = as.Date(commit_time) ) glimpse(df) #...»
 
Строка 1: Строка 1:
== Только код ==
== Только код ==
<syntaxhighlight lang="R" line>
<syntaxhighlight lang="R" line>
library(tidyverse)
library(tidyverse)
library(lubridate)
library(lubridate)

Версия от 19:34, 16 марта 2026

Только код

<syntaxhighlight lang="R" line>

library(tidyverse) library(lubridate) library(ineq) library(scales) library(knitr)

URL <- paste0(

 "https://raw.githubusercontent.com/patarakin/stat-data/",
 "1118a56e7544839d7df91a60df2a25ba577c4dd4/datasets/csv/df_rich_team.csv"

)

df <- read_csv(URL, show_col_types = FALSE) |>

 mutate(
   commit_time = ymd_hms(commit_time, tz = "UTC"),
   commit_date = as.Date(commit_time)
 )

glimpse(df)

    1. -- Functions

gini_contrib <- function(changes_vec) {

 if (length(changes_vec) < 2 || sum(changes_vec) == 0) return(0)
 ineq::Gini(changes_vec)

}

succession_ratio <- function(authors_vec) {

 n <- length(authors_vec)
 if (n < 2) return(NA_real_)
 sum(authors_vec[-1] != authors_vec[-n]) / (n - 1)

}

burstiness <- function(times_vec) {

 times_vec <- sort(times_vec)
 if (length(times_vec) < 3) return(NA_real_)
 deltas <- as.numeric(diff(times_vec), units = "secs")
 deltas <- deltas[deltas > 0]
 if (length(deltas) < 2) return(NA_real_)
 mu <- mean(deltas)
 sigma <- sd(deltas)
 if ((sigma + mu) == 0) return(NA_real_)
 (sigma - mu) / (sigma + mu)

}

shared_activity_index <- function(author_vec, date_vec) {

 authors <- unique(author_vec)
 n <- length(authors)
 if (n < 2) return(0)
 
 dates_by_author <- split(date_vec, author_vec)
 pairs <- combn(authors, 2, simplify = FALSE)
 
 overlap_count <- sum(vapply(pairs, function(p) {
   length(intersect(dates_by_author[[p[1]]],
                    dates_by_author[[p[2]]])) > 0
 }, logical(1)))
 
 overlap_count / length(pairs)

}

mean_response_minutes <- function(authors_vec, times_vec) {

 n <- length(authors_vec)
 if (n < 2) return(NA_real_)
 intervals <- numeric(0)
 for (i in seq(2, n)) {
   if (authors_vec[i] != authors_vec[i - 1]) {
     delta <- as.numeric(difftime(times_vec[i],
                                  times_vec[i - 1],
                                  units = "mins"))
     intervals <- c(intervals, delta)
   }
 }
 if (length(intervals) == 0) return(NA_real_)
 mean(intervals)

}

  1. --------------------------

author_contrib <- df |>

 group_by(project_id, author_anon) |>
 summarise(author_changes = sum(total_changes), .groups = "drop")

gini_by_project <- author_contrib |>

 group_by(project_id) |>
 summarise(
   gini_contribution = gini_contrib(author_changes),
   .groups = "drop"
 )

project_metrics <- df |>

 arrange(project_id, commit_time) |>
 group_by(project_id) |>
 summarise(
   # Inputs
   n_authors     = n_distinct(author_anon),
   n_commits     = n(),
   total_changes = sum(total_changes),
   duration_days = as.numeric(
     difftime(max(commit_time), min(commit_time), units = "days")
   ),
   
   # Action processes
   succession_ratio = succession_ratio(author_anon),
   burstiness       = burstiness(commit_time),
   commit_rate_per_day = n() / pmax(
     as.numeric(difftime(max(commit_time),
                         min(commit_time), units = "days")),
     0.01
   ),
   
   # Emergent states
   shared_activity_index =
     shared_activity_index(author_anon, commit_date),
   
   # Output: зрелость работы
   refactoring_ratio = sum(deletions) / pmax(sum(total_changes), 1),
   
   # Stigmergy: скорость реакции
   mean_response_min =
     mean_response_minutes(author_anon, commit_time),
   
   .groups = "drop"
 ) |>
 left_join(gini_by_project, by = "project_id")

glimpse(project_metrics)

  1. Нормируем ключевые метрики и агрегируем их в индекс командности.

minmax_norm <- function(x) {

 rng <- range(x, na.rm = TRUE)
 if (diff(rng) == 0) return(rep(0, length(x)))
 (x - rng) / diff(rng)[1]

}

project_metrics <- project_metrics |>

 mutate(
   balance_score   = minmax_norm(1 - gini_contribution),
   stigmergy_score = minmax_norm(succession_ratio),
   sync_score      = minmax_norm(shared_activity_index),
   teamwork_index  = (balance_score + stigmergy_score + sync_score) / 3,
   teamwork_tier   = ntile(teamwork_index, 4) |>
     factor(levels = 1:4,
            labels = c("Q1_low", "Q2", "Q3", "Q4_high"))
 )

glimpse(project_metrics)

tier_summary <- project_metrics |>

 group_by(teamwork_tier) |>
 summarise(
   n_projects      = n(),
   avg_authors     = mean(n_authors),
   avg_commits     = mean(n_commits),
   avg_gini        = mean(gini_contribution),
   avg_succession  = mean(succession_ratio),
   avg_shared      = mean(shared_activity_index),
   avg_refactoring = mean(refactoring_ratio),
   median_resp_min = median(mean_response_min, na.rm = TRUE),
   .groups = "drop"
 )

kable(tier_summary, digits = 3,

     caption = "Характеристики проектов по квартилям командности")

glimpse(tier_summary)

  1. ---------------

ggplot(project_metrics,

      aes(x = teamwork_tier, y = succession_ratio,
          fill = teamwork_tier)) +
 geom_boxplot(alpha = 0.7, outlier.size = 0.8, outlier.alpha = 0.4) +
 stat_summary(fun = mean, geom = "point", shape = 18,
              size = 3, colour = "white") +
 scale_fill_manual(values = c("#c0392b","#e67e22","#2980b9","#27ae60")) +
 labs(
   title    = "Стигмергический обмен следами по уровням командности",
   subtitle = "Succession ratio = доля коммитов, где следующий автор — другой участник",
   x        = "Уровень командности (IMOI–Stigmergy)",
   y        = "Succession ratio"
 ) +
 theme_minimal(base_size = 13) +
 theme(legend.position = "none")
  1. ---- Компоненты индекса командности

components_long <- project_metrics |>

 group_by(teamwork_tier) |>
 summarise(
   `Баланс участия`   = mean(balance_score,   na.rm = TRUE),
   `Stigmergy (след)` = mean(stigmergy_score, na.rm = TRUE),
   `Синхронность`     = mean(sync_score,      na.rm = TRUE),
   .groups = "drop"
 ) |>
 pivot_longer(-teamwork_tier,
              names_to = "component", values_to = "value")

ggplot(components_long,

      aes(x = teamwork_tier, y = value, fill = component)) +
 geom_col(position = "dodge", alpha = 0.85) +
 scale_fill_manual(values = c("#2980b9","#27ae60","#e67e22")) +
 labs(
   title    = "Компоненты индекса командности по квартилям",
   subtitle = "IMOI–Stigmergy: баланс участия, стигмергия, синхронность",
   x        = "Уровень командности",
   y        = "Нормированное значение (0–1)",
   fill     = "Компонент"
 ) +
 theme_minimal(base_size = 13) +
 theme(legend.position = "top")
  1. -- Радар-график профилей квартилей

library(fmsb)

radar_data <- project_metrics |>

 group_by(teamwork_tier) |>
 summarise(
   balance   = mean(balance_score,   na.rm = TRUE),
   stigmergy = mean(stigmergy_score, na.rm = TRUE),
   sync      = mean(sync_score,      na.rm = TRUE),
   .groups = "drop"
 )

radar_df <- rbind(

 max = c(1, 1, 1),
 min = c(0, 0, 0),
 radar_data |> select(balance, stigmergy, sync)

) rownames(radar_df) <- c("max", "min", as.character(radar_data$teamwork_tier))

radarchart(radar_df,

          axistype = 1,
          pcol = c("red","orange","blue","darkgreen"),
          plwd = 2, plty = 1,
          title = "Профили командности IMOI–Stigmergy по квартилям")

legend("topright", legend = as.character(radar_data$teamwork_tier),

      col = c("red","orange","blue","darkgreen"), lwd = 2, bty = "n")
  1. ------------------- Баланс вклада vs стигмергия

plot_data <- project_metrics |>

 filter(!is.na(gini_contribution), !is.na(succession_ratio)) |>
 mutate(
   tier = factor(teamwork_tier,
                 levels = c("Q1_low","Q2","Q3","Q4_high"),
                 labels = c("Q1 low","Q2","Q3","Q4 high"))
 )

ggplot(plot_data,

      aes(x = gini_contribution,
          y = succession_ratio,
          color = tier)) +
 geom_point(alpha = 0.35, size = 1.2) +
 geom_smooth(aes(group = 1),
             method = "loess", formula = y ~ x,
             se = TRUE, color = "black",
             linewidth = 1.2, linetype = "solid") +
 geom_vline(xintercept = 0.20, linetype = "dashed",
            color = "steelblue", linewidth = 0.8) +
 geom_vline(xintercept = 0.50, linetype = "dashed",
            color = "tomato", linewidth = 0.8) +
 annotate("text", x = 0.10, y = 0.97,
          label = "Zone A\nequal", color = "steelblue",
          size = 3.2, fontface = "italic") +
 annotate("text", x = 0.34, y = 0.97,
          label = "Zone B\noptimal", color = "grey20",
          size = 3.2, fontface = "bold") +
 annotate("text", x = 0.70, y = 0.97,
          label = "Zone C\ndominance", color = "tomato",
          size = 3.2, fontface = "italic") +
 scale_color_manual(
   values = c("Q1 low"  = "#c0392b",
              "Q2"      = "#e67e22",
              "Q3"      = "#2980b9",
              "Q4 high" = "#27ae60"),
   name = "Teamwork tier"
 ) +
 labs(
   title   = "Contribution balance vs Stigmergic exchange",
   subtitle= "Gini coefficient vs Succession ratio | N ≈ 3,400 GitLab-проектов",
   x       = "Gini of contribution (0 = equal, 1 = one author does all)",
   y       = "Succession ratio (fraction of commits: next author != previous)",
   caption = "Black line = LOESS trend; dashed lines = zone boundaries"
 ) +
 theme_minimal(base_size = 13) +
 theme(
   plot.title    = element_text(face = "bold"),
   plot.subtitle = element_text(color = "grey40"),
   legend.position = "right"
 )