Spark&R
Материал из Поле цифровой дидактики
install.packages("sparklyr")
install.packages("dplyr")
library("sparklyr")
library("dplyr")
#--
packageVersion("sparklyr")
#--
spark_available_versions()
spark_install()
sc <- spark_connect(master = "local")
cars <- copy_to(sc, mtcars)
cars
spark_web(sc)
count(cars)
###
getwd()
setwd("C:/Users/PatarakinED/Documents/R_experiments")
###
select(cars, hp, mpg) %>%
sample_n(100) %>%
collect() %>%
plot()
model <- ml_linear_regression(cars, mpg ~ hp)
model
model %>%
ml_predict(copy_to(sc, data.frame(hp = 250 + 10 * 1:10))) %>%
transmute(hp = hp, mpg = prediction) %>%
full_join(select(cars, hp, mpg)) %>%
collect() %>%
plot()
## write - read
spark_write_csv(cars, "cars.csv")
cars <- spark_read_csv(sc, "cars.csv")
##
## Dir
dir.create("input")
spark_log(sc)
##-------------
spark_disconnect(sc)
spark_disconnect_all()
summarize_all(cars, mean)
##
summarize_all(cars, mean) %>%
show_query()
cars %>%
mutate(transmission = ifelse(am == 0, "automatic", "manual")) %>%
group_by(transmission) %>%
summarise_all(mean)
ml_corr(cars)
library(corrr)
correlate(cars, use = "pairwise.complete.obs", method = "pearson")
###
correlate(cars, use = "pairwise.complete.obs", method = "pearson") %>%
shave() %>%
rplot()
library(ggplot2)
ggplot(aes(as.factor(cyl), mpg), data = mtcars) + geom_col()
ggplot(aes(mpg, wt), data = mtcars) +
geom_point()
dbplot_raster(cars, mpg, wt, resolution = 16)
########
