Spark&R: различия между версиями

Материал из Поле цифровой дидактики
Новая страница: «install.packages("sparklyr") install.packages("dplyr") library("sparklyr") library("dplyr") #-- packageVersion("sparklyr") #-- spark_available_versions() spark_install() sc <- spark_connect(master = "local") cars <- copy_to(sc, mtcars) cars spark_web(sc) count(cars) ### getwd() setwd("C:/Users/PatarakinED/Documents/R_experiments") ### select(cars, hp, mpg) %>% sample_n(100) %>% collect() %>% plot() model <- ml_linear_regression(cars, mpg ~ hp) mo...»
 
Нет описания правки
 
Строка 1: Строка 1:
<syntaxhighlight lang="R" line>
install.packages("sparklyr")
install.packages("sparklyr")
install.packages("dplyr")
install.packages("dplyr")
Строка 81: Строка 83:


########
########
</syntaxhighlight>
----
[[Категория:Lesson]]

Текущая версия от 12:37, 24 февраля 2026

install.packages("sparklyr")
install.packages("dplyr")
library("sparklyr")
library("dplyr")
#--
packageVersion("sparklyr")
#--
spark_available_versions()

spark_install()
sc <- spark_connect(master = "local")

cars <- copy_to(sc, mtcars)
cars

spark_web(sc)

count(cars)
###
getwd()
setwd("C:/Users/PatarakinED/Documents/R_experiments")

###
select(cars, hp, mpg) %>%
  sample_n(100) %>%
  collect() %>%
  plot()

model <- ml_linear_regression(cars, mpg ~ hp)
model

model %>%
  ml_predict(copy_to(sc, data.frame(hp = 250 + 10 * 1:10))) %>%
  transmute(hp = hp, mpg = prediction) %>%
  full_join(select(cars, hp, mpg)) %>%
  collect() %>%
  plot()

## write - read
spark_write_csv(cars, "cars.csv")
cars <- spark_read_csv(sc, "cars.csv")
##
## Dir
dir.create("input")

spark_log(sc)

##-------------
spark_disconnect(sc)
spark_disconnect_all()


summarize_all(cars, mean)
##
summarize_all(cars, mean) %>%
  show_query()

cars %>%
  mutate(transmission = ifelse(am == 0, "automatic", "manual")) %>%
  group_by(transmission) %>%
  summarise_all(mean)

ml_corr(cars)

library(corrr)
correlate(cars, use = "pairwise.complete.obs", method = "pearson") 

###
correlate(cars, use = "pairwise.complete.obs", method = "pearson") %>%
  shave() %>%
  rplot()

library(ggplot2)
ggplot(aes(as.factor(cyl), mpg), data = mtcars) + geom_col()


ggplot(aes(mpg, wt), data = mtcars) + 
  geom_point()

dbplot_raster(cars, mpg, wt, resolution = 16)

########