Spark&R
install.packages("sparklyr") install.packages("dplyr") library("sparklyr") library("dplyr")
- --
packageVersion("sparklyr")
- --
spark_available_versions()
spark_install() sc <- spark_connect(master = "local")
cars <- copy_to(sc, mtcars) cars
spark_web(sc)
count(cars)
getwd() setwd("C:/Users/PatarakinED/Documents/R_experiments")
select(cars, hp, mpg) %>%
sample_n(100) %>% collect() %>% plot()
model <- ml_linear_regression(cars, mpg ~ hp) model
model %>%
ml_predict(copy_to(sc, data.frame(hp = 250 + 10 * 1:10))) %>% transmute(hp = hp, mpg = prediction) %>% full_join(select(cars, hp, mpg)) %>% collect() %>% plot()
- write - read
spark_write_csv(cars, "cars.csv") cars <- spark_read_csv(sc, "cars.csv")
- Dir
dir.create("input")
spark_log(sc)
- -------------
spark_disconnect(sc) spark_disconnect_all()
summarize_all(cars, mean)
summarize_all(cars, mean) %>%
show_query()
cars %>%
mutate(transmission = ifelse(am == 0, "automatic", "manual")) %>% group_by(transmission) %>% summarise_all(mean)
ml_corr(cars)
library(corrr) correlate(cars, use = "pairwise.complete.obs", method = "pearson")
correlate(cars, use = "pairwise.complete.obs", method = "pearson") %>%
shave() %>% rplot()
library(ggplot2) ggplot(aes(as.factor(cyl), mpg), data = mtcars) + geom_col()
ggplot(aes(mpg, wt), data = mtcars) +
geom_point()
dbplot_raster(cars, mpg, wt, resolution = 16)
