Как оценить командность в студиях Scratch: различия между версиями

Материал из Поле цифровой дидактики
Нет описания правки
Нет описания правки
 
Строка 5: Строка 5:
|Environment=R, Scratch API, Сообщество Scratch
|Environment=R, Scratch API, Сообщество Scratch
}}
}}
==  [[Командность]] ==


[[Файл:Rplot Teamwork.png]]
[[Файл:Rplot Teamwork.png]]

Текущая версия от 15:36, 29 марта 2026

Описание Мы хотим сравнить командность действий в студиях Scratch. Рецепт сделан на примере 5 студий, но может быть расширен и можно провести большое исследование для множества студий
Область знаний Информатика, Искусственный интеллект
Область использования (ISTE)
Возрастная категория


Поясняющее видео
Близкие рецепту понятия Как использовать Scratch API
Среды и средства для приготовления рецепта: R, Scratch API, Сообщество Scratch


Базовые функции - Scratch_Api_core.R

######## 
##  Анализ командности в  Scrath studio - Base Functions
#### Scratch_Api_core.R

library(httr)
library(jsonlite)
library(dplyr)
library(purrr)
library(tidyr)
library(lubridate)

##------------- get_studio_projects 
get_studio_projects <- function(studio_id, limit = 40, verbose = TRUE) {
  offset <- 0
  all_projects <- list()
  page <- 1
  
  repeat {
    url <- paste0(
      "https://api.scratch.mit.edu/studios/",
      studio_id,
      "/projects/?limit=", limit,
      "&offset=", offset
    )
    
    if (verbose) message("Requesting page ", page, " (offset = ", offset, ")")
    
    resp <- GET(url)
    if (status_code(resp) != 200) {
      warning("Non-200 status code: ", status_code(resp), " at offset ", offset)
      break
    }
    
    txt  <- content(resp, as = "text", encoding = "UTF-8")
    dat  <- fromJSON(txt, flatten = TRUE)
    
    # dat — либо data.frame, либо пустой список
    if (length(dat) == 0) break
    
    all_projects[[length(all_projects) + 1]] <- dat
    
    # если вернулось меньше, чем limit, значит это была последняя страница
    if (nrow(dat) < limit) break
    
    offset <- offset + limit
    page   <- page + 1
  }
  
  if (length(all_projects) == 0) return(NULL)
  
  bind_rows(all_projects) |>
    transmute(
      project_id  = id,
      title       = title,
      creator_id  = creator_id,
      username    = username
    ) |>
    distinct()
}


##------------- get_get_user_projects
# 1. проекты пользователя: /users/<username>/projects
get_user_projects <- function(username, limit = 40, max_pages = 10, verbose = TRUE) {
  offset <- 0
  all <- list()
  page <- 1
  
  repeat {
    if (page > max_pages) break
    
    url <- paste0(
      "https://api.scratch.mit.edu/users/",
      username,
      "/projects?limit=", limit,
      "&offset=", offset
    )
    if (verbose) message("User ", username, ": page ", page, " (offset = ", offset, ")")
    
    resp <- GET(url)
    if (status_code(resp) != 200) break
    
    txt <- content(resp, as = "text", encoding = "UTF-8")
    dat <- fromJSON(txt, flatten = TRUE)
    if (length(dat) == 0) break
    
    all[[length(all) + 1]] <- dat
    
    if (nrow(dat) < limit) break
    offset <- offset + limit
    page   <- page + 1
  }
  
  if (length(all) == 0) return(NULL)
  
  res <- bind_rows(all)
  
  res |>
    transmute(
      project_id = id,
      title      = title,
      username   = username   # <- просто подставляем аргумент
    ) |>
    distinct()
}




# 2. студии проекта: /projects/<id>/studios
#-- get_project_studios

get_project_studios <- function(username, project_id, limit = 40, max_pages = 10, verbose = TRUE) {
  user_name <- as.character(username)[1]  # гарантируем скалярную строку
  offset <- 0
  all <- list()
  page <- 1
  
  repeat {
    if (page > max_pages) break
    
    url <- paste0(
      "https://api.scratch.mit.edu/users/",
      user_name,
      "/projects/",
      project_id,
      "/studios?limit=", limit,
      "&offset=", offset
    )
    
    if (verbose) {
      message("User ", user_name,
              ", project ", project_id,
              ": page ", page, " (offset = ", offset, ")")
    }
    
    resp <- httr::GET(url)
    if (httr::status_code(resp) != 200) {
      return(NULL)
    }
    
    txt <- httr::content(resp, as = "text", encoding = "UTF-8")
    dat <- jsonlite::fromJSON(txt, flatten = TRUE)
    
    if (length(dat) == 0) break
    
    all[[length(all) + 1]] <- dat
    
    if (nrow(dat) < limit) break
    offset <- offset + limit
    page   <- page + 1
  }
  
  if (length(all) == 0) return(NULL)
  
  res <- dplyr::bind_rows(all)
  if (!("id" %in% names(res)) || !("title" %in% names(res))) return(NULL)
  
  res |>
    dplyr::transmute(
      studio_id   = id,
      studio_name = title
    ) |>
    dplyr::distinct()
}

# 3. связка "автор — его проекты — студии этих проектов"
get_user_project_studios <- function(username, verbose = TRUE) {
  user_projects <- get_user_projects(username, verbose = verbose)
  if (is.null(user_projects) || nrow(user_projects) == 0) return(NULL)
  
  proj_studios <- user_projects |>
    dplyr::mutate(
      studios = purrr::map(
        project_id,
        ~ get_project_studios(username, .x, verbose = verbose)
      )
    )
  
  proj_studios_nonempty <- proj_studios |>
    dplyr::filter(!purrr::map_lgl(studios, is.null))
  
  if (nrow(proj_studios_nonempty) == 0) return(NULL)
  
  proj_studios_nonempty |>
    tidyr::unnest(cols = studios) |>
    dplyr::transmute(
      username,
      project_id,
      project_title = title,
      studio_id,
      studio_name
    ) |>
    dplyr::distinct()
}


## --------


get_studio_author_studios <- function(studio_id, verbose = TRUE) {
  # 1. проекты студии
  studio_projects <- get_studio_projects(studio_id, verbose = verbose)
  if (is.null(studio_projects) || nrow(studio_projects) == 0) return(NULL)
  
  # 2. авторы этих проектов
  authors <- unique(studio_projects$username)
  
  # 3. для каждого автора — его проекты и студии этих проектов
  author_studios <- purrr::map_df(
    authors,
    ~ {
      if (verbose) message("Author: ", .x)
      get_user_project_studios(.x, verbose = verbose)
    }
  )
  
  # 4. возвращаем оба слоя: проекты самой студии и студии авторов
  list(
    studio_projects = studio_projects,
    author_studios  = author_studios
  )
}

### --- Studio comments ---

############################################################
# 0. Вспомогательная функция: безопасный GET с JSON
############################################################

safe_get_json <- function(url, verbose = TRUE) {
  if (verbose) message("GET ", url)
  resp <- GET(url)
  if (status_code(resp) != 200) return(NULL)
  txt <- content(resp, as = "text", encoding = "UTF-8")
  if (identical(txt, "") || is.null(txt)) return(NULL)
  out <- fromJSON(txt, flatten = TRUE)
  out
}

###--------

get_studio_comments <- function(studio_id,
                                limit = 40,
                                max_pages = 200,
                                verbose = TRUE) {
  offset <- 0
  page <- 1
  all <- list()
  
  repeat {
    if (page > max_pages) break
    
    url <- paste0(
      "https://api.scratch.mit.edu/studios/",
      studio_id,
      "/comments?limit=", limit,
      "&offset=", offset
    )
    
    dat <- safe_get_json(url, verbose = verbose)
    if (is.null(dat) || length(dat) == 0) break
    
    all[[length(all) + 1]] <- dat
    
    if (nrow(dat) < limit) break
    
    offset <- offset + limit
    page   <- page + 1
  }
  
  if (length(all) == 0) return(NULL)
  
  res <- bind_rows(all)
  
  res |>
    transmute(
      studio_id        = studio_id,
      comment_id       = id,
      parent_id        = if ("parent_id" %in% names(res)) parent_id else NA,
      author_id        = `author.id`,
      author_username  = `author.username`,
      content          = content,
      datetime_created = lubridate::as_datetime(datetime_created),
      datetime_modified = lubridate::as_datetime(datetime_modified),
      visibility       = visibility,
      reply_count      = reply_count
    )
}
###----
get_project_comments <- function(username,
                                 project_id,
                                 limit = 40,
                                 max_pages = 200,
                                 verbose = TRUE) {
  offset <- 0
  page <- 1
  all <- list()
  user_name <- as.character(username)[1]
  
  repeat {
    if (page > max_pages) break
    
    url <- paste0(
      "https://api.scratch.mit.edu/users/",
      user_name,
      "/projects/",
      project_id,
      "/comments?limit=", limit,
      "&offset=", offset
    )
    
    dat <- safe_get_json(url, verbose = verbose)
    if (is.null(dat) || length(dat) == 0) break
    
    all[[length(all) + 1]] <- dat
    
    if (nrow(dat) < limit) break
    
    offset <- offset + limit
    page   <- page + 1
  }
  
  if (length(all) == 0) return(NULL)
  
  res <- bind_rows(all)
  
  res |>
    transmute(
      project_id        = project_id,
      project_owner     = user_name,
      comment_id        = id,
      parent_id         = if ("parent_id" %in% names(res)) parent_id else NA,
      author_id         = `author.id`,
      author_username   = `author.username`,
      content           = content,
      datetime_created  = lubridate::as_datetime(datetime_created),
      datetime_modified = lubridate::as_datetime(datetime_modified),
      visibility        = visibility,
      reply_count       = reply_count
    )
}

### ---


get_all_project_comments_for_studio <- function(studio_id,
                                                studio_projects,
                                                verbose = TRUE) {
  if (is.null(studio_projects) || nrow(studio_projects) == 0) return(NULL)
  
  # Оставляем только нужные поля
  sp <- studio_projects |>
    transmute(
      project_id,
      project_owner = username
    ) |>
    distinct()
  
  all_comments <- purrr::map2_df(
    sp$project_owner,
    sp$project_id,
    ~ {
      if (verbose) message("Project ", .y, " (owner: ", .x, ")")
      get_project_comments(.x, .y, verbose = verbose)
    }
  )
  
  if (nrow(all_comments) == 0) return(NULL)
  
  all_comments |>
    mutate(
      studio_id = studio_id,
      .before = 1
    )
}

##------------


analyze_studio_comments <- function(studio_id,
                                    studio_projects,
                                    verbose = TRUE) {
  # 4.1. Комментарии в студии
  studio_comments <- get_studio_comments(studio_id, verbose = verbose)
  
  # 4.2. Комментарии ко всем проектам студии
  project_comments <- get_all_project_comments_for_studio(
    studio_id = studio_id,
    studio_projects = studio_projects,
    verbose = verbose
  )
  
  # Если нет комментариев, возвращаем только то, что есть
  if (is.null(studio_comments) && is.null(project_comments)) {
    warning("Нет комментариев ни в студии, ни в проектах.")
    return(NULL)
  }
  
  # 4.3. Авторские составы
  # — кто пишет в студии, кто в проектах, кто в обоих
  authors_studio <- studio_comments |>
    distinct(author_username) |>
    mutate(where = "studio")
  
  authors_projects <- project_comments |>
    distinct(author_username) |>
    mutate(where = "projects")
  
  authors_all <- bind_rows(authors_studio, authors_projects) |>
    group_by(author_username) |>
    summarise(
      in_studio   = "studio"   %in% where,
      in_projects = "projects" %in% where,
      .groups = "drop"
    )
  
  # 4.4. Частоты комментариев по авторам
  studio_author_freq <- studio_comments |>
    count(author_username, name = "studio_comments") |>
    arrange(desc(studio_comments))
  
  project_author_freq <- project_comments |>
    count(author_username, name = "project_comments") |>
    arrange(desc(project_comments))
  
  # 4.5. Простые текстовые показатели
  # длина комментария в символах
  studio_text_stats <- studio_comments |>
    mutate(
      n_chars = nchar(content, allowNA = TRUE)
    )
  
  project_text_stats <- project_comments |>
    mutate(
      n_chars = nchar(content, allowNA = TRUE)
    )
  
  # 4.6. Динамика по времени (по месяцам)
  studio_time <- studio_comments |>
    mutate(month = floor_date(datetime_created, "month")) |>
    count(month, name = "studio_comments") |>
    arrange(month)
  
  project_time <- project_comments |>
    mutate(month = floor_date(datetime_created, "month")) |>
    count(month, name = "project_comments") |>
    arrange(month)
  
  # 4.7. Собираем результаты для дальнейшей визуализации / анализа
  res <- list(
    studio_comments      = studio_comments,
    project_comments     = project_comments,
    authors_overlap      = authors_all,
    studio_author_freq   = studio_author_freq,
    project_author_freq  = project_author_freq,
    studio_text_stats    = studio_text_stats,
    project_text_stats   = project_text_stats,
    studio_time          = studio_time,
    project_time         = project_time
  )
  
  # 4.8. Простые сводные метрики для интерпретации
  # Авторские роли
  n_authors_total <- nrow(authors_all)
  n_only_studio   <- sum(authors_all$in_studio & !authors_all$in_projects)
  n_only_proj     <- sum(!authors_all$in_studio & authors_all$in_projects)
  n_both          <- sum(authors_all$in_studio & authors_all$in_projects)
  
  share_only_studio <- n_only_studio / n_authors_total
  share_only_proj   <- n_only_proj   / n_authors_total
  share_both        <- n_both        / n_authors_total
  
  # Средняя длина комментариев (в символах)
  mean_len_studio  <- mean(studio_text_stats$n_chars,  na.rm = TRUE)
  mean_len_project <- mean(project_text_stats$n_chars, na.rm = TRUE)
  
  # Топ-авторы в студии и в проектах (по числу комментариев)
  top_studio_authors <- studio_author_freq |>
    head(10)
  
  top_project_authors <- project_author_freq |>
    head(10)
  
  res$summary_metrics <- list(
    n_authors_total   = n_authors_total,
    n_only_studio     = n_only_studio,
    n_only_projects   = n_only_proj,
    n_both            = n_both,
    share_only_studio = share_only_studio,
    share_only_projects = share_only_proj,
    share_both        = share_both,
    mean_len_studio   = mean_len_studio,
    mean_len_project  = mean_len_project,
    top_studio_authors   = top_studio_authors,
    top_project_authors  = top_project_authors
  )
  
  res
  
}
###################

# Вспомогательная: метаданные проекта по /projects/<id>
get_project_metadata <- function(project_id, verbose = TRUE) {
  url <- paste0("https://api.scratch.mit.edu/projects/", project_id)
  if (verbose) message("GET ", url)
  
  resp <- httr::GET(url)
  if (httr::status_code(resp) != 200) return(NULL)
  
  txt <- httr::content(resp, as = "text", encoding = "UTF-8")
  if (identical(txt, "") || is.null(txt)) return(NULL)
  
  jsonlite::fromJSON(txt, flatten = TRUE)
}

# Ремикс-метрики для одной студии
get_studio_remix_stats <- function(studio_id,
                                   studio_projects,
                                   verbose = TRUE) {
  if (is.null(studio_projects) || nrow(studio_projects) == 0) {
    warning("No projects for studio ", studio_id)
    return(NULL)
  }
  
  proj_ids <- unique(studio_projects$project_id)
  proj_set <- proj_ids  # вектор для проверки принадлежности
  
  if (verbose) message("Studio ", studio_id, ": ", length(proj_ids), " projects")
  
  # Получаем remix.parent и remix.root для каждого проекта
  meta_list <- purrr::map(
    proj_ids,
    function(pid) {
      dat <- get_project_metadata(pid, verbose = verbose)
      if (is.null(dat) || is.null(dat$remix)) {
        return(
          tibble(
            project_id = pid,
            remix_parent = NA_integer_,
            remix_root   = NA_integer_
          )
        )
      }
      tibble(
        project_id   = pid,
        remix_parent = if (!is.null(dat$remix$parent)) dat$remix$parent else NA_integer_,
        remix_root   = if (!is.null(dat$remix$root))   dat$remix$root   else NA_integer_
      )
    }
  )
  
  remix_df <- bind_rows(meta_list)
  
  n_projects <- nrow(remix_df)
  
  # Является ли проект ремиксом вообще
  remix_df <- remix_df |>
    mutate(
      is_remix = !is.na(remix_parent) | !is.na(remix_root),
      parent_in_studio = !is.na(remix_parent) & remix_parent %in% proj_set,
      root_in_studio   = !is.na(remix_root)   & remix_root   %in% proj_set
    )
  
  n_remix_total          <- sum(remix_df$is_remix, na.rm = TRUE)
  n_remix_strict_internal <- sum(remix_df$parent_in_studio, na.rm = TRUE)
  n_remix_broad_internal  <- sum(remix_df$root_in_studio,   na.rm = TRUE)
  
  tibble(
    studio_id              = studio_id,
    n_projects             = n_projects,
    n_remix_total          = n_remix_total,
    n_remix_strict_internal = n_remix_strict_internal,
    n_remix_broad_internal  = n_remix_broad_internal,
    share_remix_total          = if (n_projects > 0) n_remix_total / n_projects else NA_real_,
    share_remix_strict_internal = if (n_projects > 0) n_remix_strict_internal / n_projects else NA_real_,
    share_remix_broad_internal  = if (n_projects > 0) n_remix_broad_internal  / n_projects else NA_real_
  )
}


####-- 
analyze_remix_for_one <- function(studio_id, lang = NA, verbose = TRUE) {
  message("=== Studio ", studio_id, " (", lang, ") ===")
  
  sp <- get_studio_projects(studio_id, verbose = verbose)
  if (is.null(sp) || nrow(sp) == 0) {
    warning("No projects for studio ", studio_id)
    return(NULL)
  }
  
  rs <- get_studio_remix_stats(
    studio_id       = studio_id,
    studio_projects = sp,
    verbose         = verbose
  )
  
  # Убираем внутренний studio_id, оставляем только внешний
  rs <- dplyr::select(rs, -studio_id)
  
  rs
}


Scratch_Api_team_analysis.R

library(scales)
library(fmsb)

source("Scratch_Api_core.R", encoding = "UTF-8")


#################
studios_to_analyze <- tibble::tibble(
  studio_id = c(
    32968123,  # EN
    51122357,  # DE
    1865715,   # FR
    29547282,  # RU
    294100     # JA
  ),
  lang = c("en", "de", "fr", "ru", "ja")
)


##################


# 2.1. Проекты студий
studio_projects_list <- studios_to_analyze |>
  mutate(
    studio_projects = map(
      studio_id,
      ~ get_studio_projects(.x, verbose = TRUE)
    )
  )

# 2.2. Анализ комментариев для каждой студии
studio_comments_analysis <- studio_projects_list |>
  mutate(
    comments_analysis = map2(
      studio_id, studio_projects,
      ~ analyze_studio_comments(
        studio_id       = .x,
        studio_projects = .y,
        verbose         = TRUE
      )
    )
  )

# 2.3. Сводные метрики по комментариям (studio_metrics)
studio_metrics <- studio_comments_analysis |>
  mutate(
    summary_metrics = map(comments_analysis, ~ .x$summary_metrics)
  ) |>
  unnest_wider(summary_metrics)

studio_metrics

#######################################
# Предполагаем, что функция analyze_remix_for_one уже определена в Scratch_Api_core.R

studio_remix_metrics <- studios_to_analyze |>
  mutate(
    remix_metrics = map2(
      studio_id, lang,
      ~ analyze_remix_for_one(
        studio_id = .x,
        lang      = .y,
        verbose   = TRUE
      )
    )
  ) |>
  unnest(cols = remix_metrics)

studio_remix_metrics

############################


studio_team_profile <- studio_metrics |>
  inner_join(
    studio_remix_metrics,
    by = c("studio_id", "lang")
  ) |>
  transmute(
    studio_id,
    lang,
    share_both,
    share_only_projects,
    mean_len_studio,
    mean_len_project,
    remix_share_total    = n_remix_total / n_projects,
    remix_share_internal = n_remix_strict_internal / n_projects
  )

studio_team_profile



##################
# Берём профиль командности
team_radar <- studio_team_profile |>
  mutate(
    metric_A_both     = share_both,
    metric_A_onlyproj = share_only_projects,
    metric_L_studio   = rescale(mean_len_studio),
    metric_L_project  = rescale(mean_len_project),
    metric_R_total    = remix_share_total,
    metric_R_internal = remix_share_internal
  ) |>
  select(studio_id, lang,
         metric_A_both,
         metric_A_onlyproj,
         metric_L_studio,
         metric_L_project,
         metric_R_total,
         metric_R_internal)

# Пример: строим radar для ru-студии


ru_row <- team_radar |>
  filter(lang == "ru") |>
  select(-studio_id, -lang)

radar_data_ru <- rbind(
  apply(ru_row, 2, function(x) 1),  # max
  apply(ru_row, 2, function(x) 0),  # min
  ru_row
)

rownames(radar_data_ru) <- c("max", "min", "ru")

radarchart(
  radar_data_ru,
  axistype  = 1,
  pcol      = "red",
  pfcol     = scales::alpha("red", 0.3),
  plwd      = 2,
  cglcol    = "grey",
  cglty     = 1,
  axislabcol = "grey",
  vlcex     = 0.8,
  title     = "Профиль командности студии (ru)"
)


 ##############


# Вспомогательная функция: построить радар для одной студии по lang
plot_team_radar <- function(team_radar, lang_code) {
  row <- team_radar |>
    filter(lang == lang_code) |>
    select(-studio_id, -lang)
  
  radar_data <- rbind(
    apply(row, 2, function(x) 1),
    apply(row, 2, function(x) 0),
    row
  )
  rownames(radar_data) <- c("max", "min", lang_code)
  
  radarchart(
    radar_data,
    axistype   = 1,
    pcol       = "steelblue",
    pfcol      = alpha("steelblue", 0.3),
    plwd       = 2,
    cglcol     = "grey80",
    cglty      = 1,
    axislabcol = "grey30",
    vlcex      = 0.7,
    title      = paste("Командность студии:", lang_code)
  )
}

# Ставим сетку 2x3 (5 студий + 1 пустое место)
par(mfrow = c(2, 3), mar = c(2, 2, 3, 2))

for (lg in team_radar$lang) {
  plot_team_radar(team_radar, lg)
}

par(mfrow = c(1, 1))  # вернуть настройку