|
|
| Строка 1: |
Строка 1: |
| == Только код ==
| |
|
| |
|
| <syntaxhighlight lang="R" line>
| |
|
| |
| library(tidyverse)
| |
| library(lubridate)
| |
| library(ineq)
| |
| library(scales)
| |
| library(knitr)
| |
|
| |
| URL <- paste0(
| |
| "https://raw.githubusercontent.com/patarakin/stat-data/",
| |
| "1118a56e7544839d7df91a60df2a25ba577c4dd4/datasets/csv/df_rich_team.csv"
| |
| )
| |
|
| |
| df <- read_csv(URL, show_col_types = FALSE) |>
| |
| mutate(
| |
| commit_time = ymd_hms(commit_time, tz = "UTC"),
| |
| commit_date = as.Date(commit_time)
| |
| )
| |
|
| |
| glimpse(df)
| |
|
| |
| ##-- Functions
| |
| gini_contrib <- function(changes_vec) {
| |
| if (length(changes_vec) < 2 || sum(changes_vec) == 0) return(0)
| |
| ineq::Gini(changes_vec)
| |
| }
| |
|
| |
| succession_ratio <- function(authors_vec) {
| |
| n <- length(authors_vec)
| |
| if (n < 2) return(NA_real_)
| |
| sum(authors_vec[-1] != authors_vec[-n]) / (n - 1)
| |
| }
| |
|
| |
| burstiness <- function(times_vec) {
| |
| times_vec <- sort(times_vec)
| |
| if (length(times_vec) < 3) return(NA_real_)
| |
| deltas <- as.numeric(diff(times_vec), units = "secs")
| |
| deltas <- deltas[deltas > 0]
| |
| if (length(deltas) < 2) return(NA_real_)
| |
| mu <- mean(deltas)
| |
| sigma <- sd(deltas)
| |
| if ((sigma + mu) == 0) return(NA_real_)
| |
| (sigma - mu) / (sigma + mu)
| |
| }
| |
|
| |
| shared_activity_index <- function(author_vec, date_vec) {
| |
| authors <- unique(author_vec)
| |
| n <- length(authors)
| |
| if (n < 2) return(0)
| |
|
| |
| dates_by_author <- split(date_vec, author_vec)
| |
| pairs <- combn(authors, 2, simplify = FALSE)
| |
|
| |
| overlap_count <- sum(vapply(pairs, function(p) {
| |
| length(intersect(dates_by_author[[p[1]]],
| |
| dates_by_author[[p[2]]])) > 0
| |
| }, logical(1)))
| |
|
| |
| overlap_count / length(pairs)
| |
| }
| |
|
| |
| mean_response_minutes <- function(authors_vec, times_vec) {
| |
| n <- length(authors_vec)
| |
| if (n < 2) return(NA_real_)
| |
| intervals <- numeric(0)
| |
| for (i in seq(2, n)) {
| |
| if (authors_vec[i] != authors_vec[i - 1]) {
| |
| delta <- as.numeric(difftime(times_vec[i],
| |
| times_vec[i - 1],
| |
| units = "mins"))
| |
| intervals <- c(intervals, delta)
| |
| }
| |
| }
| |
| if (length(intervals) == 0) return(NA_real_)
| |
| mean(intervals)
| |
| }
| |
|
| |
| #--------------------------
| |
| author_contrib <- df |>
| |
| group_by(project_id, author_anon) |>
| |
| summarise(author_changes = sum(total_changes), .groups = "drop")
| |
|
| |
| gini_by_project <- author_contrib |>
| |
| group_by(project_id) |>
| |
| summarise(
| |
| gini_contribution = gini_contrib(author_changes),
| |
| .groups = "drop"
| |
| )
| |
|
| |
| project_metrics <- df |>
| |
| arrange(project_id, commit_time) |>
| |
| group_by(project_id) |>
| |
| summarise(
| |
| # Inputs
| |
| n_authors = n_distinct(author_anon),
| |
| n_commits = n(),
| |
| total_changes = sum(total_changes),
| |
| duration_days = as.numeric(
| |
| difftime(max(commit_time), min(commit_time), units = "days")
| |
| ),
| |
|
| |
| # Action processes
| |
| succession_ratio = succession_ratio(author_anon),
| |
| burstiness = burstiness(commit_time),
| |
| commit_rate_per_day = n() / pmax(
| |
| as.numeric(difftime(max(commit_time),
| |
| min(commit_time), units = "days")),
| |
| 0.01
| |
| ),
| |
|
| |
| # Emergent states
| |
| shared_activity_index =
| |
| shared_activity_index(author_anon, commit_date),
| |
|
| |
| # Output: зрелость работы
| |
| refactoring_ratio = sum(deletions) / pmax(sum(total_changes), 1),
| |
|
| |
| # Stigmergy: скорость реакции
| |
| mean_response_min =
| |
| mean_response_minutes(author_anon, commit_time),
| |
|
| |
| .groups = "drop"
| |
| ) |>
| |
| left_join(gini_by_project, by = "project_id")
| |
|
| |
| glimpse(project_metrics)
| |
|
| |
| # Нормируем ключевые метрики и агрегируем их в '''индекс командности'''.
| |
|
| |
| minmax_norm <- function(x) {
| |
| rng <- range(x, na.rm = TRUE)
| |
| if (diff(rng) == 0) return(rep(0, length(x)))
| |
| (x - rng) / diff(rng)[1]
| |
| }
| |
|
| |
| project_metrics <- project_metrics |>
| |
| mutate(
| |
| balance_score = minmax_norm(1 - gini_contribution),
| |
| stigmergy_score = minmax_norm(succession_ratio),
| |
| sync_score = minmax_norm(shared_activity_index),
| |
| teamwork_index = (balance_score + stigmergy_score + sync_score) / 3,
| |
| teamwork_tier = ntile(teamwork_index, 4) |>
| |
| factor(levels = 1:4,
| |
| labels = c("Q1_low", "Q2", "Q3", "Q4_high"))
| |
| )
| |
|
| |
| glimpse(project_metrics)
| |
|
| |
| tier_summary <- project_metrics |>
| |
| group_by(teamwork_tier) |>
| |
| summarise(
| |
| n_projects = n(),
| |
| avg_authors = mean(n_authors),
| |
| avg_commits = mean(n_commits),
| |
| avg_gini = mean(gini_contribution),
| |
| avg_succession = mean(succession_ratio),
| |
| avg_shared = mean(shared_activity_index),
| |
| avg_refactoring = mean(refactoring_ratio),
| |
| median_resp_min = median(mean_response_min, na.rm = TRUE),
| |
| .groups = "drop"
| |
| )
| |
|
| |
| kable(tier_summary, digits = 3,
| |
| caption = "Характеристики проектов по квартилям командности")
| |
|
| |
| glimpse(tier_summary)
| |
|
| |
| #---------------
| |
|
| |
| ggplot(project_metrics,
| |
| aes(x = teamwork_tier, y = succession_ratio,
| |
| fill = teamwork_tier)) +
| |
| geom_boxplot(alpha = 0.7, outlier.size = 0.8, outlier.alpha = 0.4) +
| |
| stat_summary(fun = mean, geom = "point", shape = 18,
| |
| size = 3, colour = "white") +
| |
| scale_fill_manual(values = c("#c0392b","#e67e22","#2980b9","#27ae60")) +
| |
| labs(
| |
| title = "Стигмергический обмен следами по уровням командности",
| |
| subtitle = "Succession ratio = доля коммитов, где следующий автор — другой участник",
| |
| x = "Уровень командности (IMOI–Stigmergy)",
| |
| y = "Succession ratio"
| |
| ) +
| |
| theme_minimal(base_size = 13) +
| |
| theme(legend.position = "none")
| |
|
| |
| #---- Компоненты индекса командности
| |
|
| |
| components_long <- project_metrics |>
| |
| group_by(teamwork_tier) |>
| |
| summarise(
| |
| `Баланс участия` = mean(balance_score, na.rm = TRUE),
| |
| `Stigmergy (след)` = mean(stigmergy_score, na.rm = TRUE),
| |
| `Синхронность` = mean(sync_score, na.rm = TRUE),
| |
| .groups = "drop"
| |
| ) |>
| |
| pivot_longer(-teamwork_tier,
| |
| names_to = "component", values_to = "value")
| |
|
| |
| ggplot(components_long,
| |
| aes(x = teamwork_tier, y = value, fill = component)) +
| |
| geom_col(position = "dodge", alpha = 0.85) +
| |
| scale_fill_manual(values = c("#2980b9","#27ae60","#e67e22")) +
| |
| labs(
| |
| title = "Компоненты индекса командности по квартилям",
| |
| subtitle = "IMOI–Stigmergy: баланс участия, стигмергия, синхронность",
| |
| x = "Уровень командности",
| |
| y = "Нормированное значение (0–1)",
| |
| fill = "Компонент"
| |
| ) +
| |
| theme_minimal(base_size = 13) +
| |
| theme(legend.position = "top")
| |
|
| |
| # -- Радар-график профилей квартилей
| |
|
| |
| library(fmsb)
| |
|
| |
| radar_data <- project_metrics |>
| |
| group_by(teamwork_tier) |>
| |
| summarise(
| |
| balance = mean(balance_score, na.rm = TRUE),
| |
| stigmergy = mean(stigmergy_score, na.rm = TRUE),
| |
| sync = mean(sync_score, na.rm = TRUE),
| |
| .groups = "drop"
| |
| )
| |
|
| |
| radar_df <- rbind(
| |
| max = c(1, 1, 1),
| |
| min = c(0, 0, 0),
| |
| radar_data |> select(balance, stigmergy, sync)
| |
| )
| |
| rownames(radar_df) <- c("max", "min", as.character(radar_data$teamwork_tier))
| |
|
| |
| radarchart(radar_df,
| |
| axistype = 1,
| |
| pcol = c("red","orange","blue","darkgreen"),
| |
| plwd = 2, plty = 1,
| |
| title = "Профили командности IMOI–Stigmergy по квартилям")
| |
| legend("topright", legend = as.character(radar_data$teamwork_tier),
| |
| col = c("red","orange","blue","darkgreen"), lwd = 2, bty = "n")
| |
|
| |
| #------------------- Баланс вклада vs стигмергия
| |
|
| |
| plot_data <- project_metrics |>
| |
| filter(!is.na(gini_contribution), !is.na(succession_ratio)) |>
| |
| mutate(
| |
| tier = factor(teamwork_tier,
| |
| levels = c("Q1_low","Q2","Q3","Q4_high"),
| |
| labels = c("Q1 low","Q2","Q3","Q4 high"))
| |
| )
| |
|
| |
| ggplot(plot_data,
| |
| aes(x = gini_contribution,
| |
| y = succession_ratio,
| |
| color = tier)) +
| |
| geom_point(alpha = 0.35, size = 1.2) +
| |
| geom_smooth(aes(group = 1),
| |
| method = "loess", formula = y ~ x,
| |
| se = TRUE, color = "black",
| |
| linewidth = 1.2, linetype = "solid") +
| |
| geom_vline(xintercept = 0.20, linetype = "dashed",
| |
| color = "steelblue", linewidth = 0.8) +
| |
| geom_vline(xintercept = 0.50, linetype = "dashed",
| |
| color = "tomato", linewidth = 0.8) +
| |
| annotate("text", x = 0.10, y = 0.97,
| |
| label = "Zone A\nequal", color = "steelblue",
| |
| size = 3.2, fontface = "italic") +
| |
| annotate("text", x = 0.34, y = 0.97,
| |
| label = "Zone B\noptimal", color = "grey20",
| |
| size = 3.2, fontface = "bold") +
| |
| annotate("text", x = 0.70, y = 0.97,
| |
| label = "Zone C\ndominance", color = "tomato",
| |
| size = 3.2, fontface = "italic") +
| |
| scale_color_manual(
| |
| values = c("Q1 low" = "#c0392b",
| |
| "Q2" = "#e67e22",
| |
| "Q3" = "#2980b9",
| |
| "Q4 high" = "#27ae60"),
| |
| name = "Teamwork tier"
| |
| ) +
| |
| labs(
| |
| title = "Contribution balance vs Stigmergic exchange",
| |
| subtitle= "Gini coefficient vs Succession ratio | N ≈ 3,400 GitLab-проектов",
| |
| x = "Gini of contribution (0 = equal, 1 = one author does all)",
| |
| y = "Succession ratio (fraction of commits: next author != previous)",
| |
| caption = "Black line = LOESS trend; dashed lines = zone boundaries"
| |
| ) +
| |
| theme_minimal(base_size = 13) +
| |
| theme(
| |
| plot.title = element_text(face = "bold"),
| |
| plot.subtitle = element_text(color = "grey40"),
| |
| legend.position = "right"
| |
| )
| |
|
| |
| </syntaxhighlight>
| |