library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(knitr)
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(tidycensus)
candy <- read_csv('candydata.csv', col_names = TRUE)
## Rows: 85 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): competitorname
## dbl (12): chocolate, fruity, caramel, peanutyalmondy, nougat, crispedricewaf...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Select
#Select chocolate, caramel and sugarpercent
candy %>% select(chocolate,caramel,sugarpercent) %>%
arrange(desc(sugarpercent))
## # A tibble: 85 × 3
## chocolate caramel sugarpercent
## <dbl> <dbl> <dbl>
## 1 1 0 0.988
## 2 1 1 0.965
## 3 0 1 0.965
## 4 0 0 0.941
## 5 0 0 0.941
## 6 0 0 0.906
## 7 0 0 0.906
## 8 0 0 0.906
## 9 0 0 0.872
## 10 0 0 0.872
## # ℹ 75 more rows
Arrange
candy %>% select(chocolate,caramel,sugarpercent) %>%
arrange((sugarpercent))
## # A tibble: 85 × 3
## chocolate caramel sugarpercent
## <dbl> <dbl> <dbl>
## 1 0 0 0.011
## 2 0 0 0.011
## 3 1 0 0.0340
## 4 0 0 0.046
## 5 0 0 0.046
## 6 0 0 0.0690
## 7 0 0 0.0690
## 8 0 0 0.0930
## 9 0 0 0.0930
## 10 0 0 0.0930
## # ℹ 75 more rows
## default is ascedning
Filter
How many chocolate?
candy %>% select(chocolate,caramel,sugarpercent) %>% filter(chocolate == 1)
How many are chocolate and caramel?
candy %>% select(chocolate,caramel,sugarpercent) %>%
filter(chocolate == 1, caramel == 1)
## # A tibble: 10 × 3
## chocolate caramel sugarpercent
## <dbl> <dbl> <dbl>
## 1 1 1 0.732
## 2 1 1 0.604
## 3 1 1 0.302
## 4 1 1 0.604
## 5 1 1 0.732
## 6 1 1 0.965
## 7 1 1 0.860
## 8 1 1 0.546
## 9 1 1 0.604
## 10 1 1 0.546
How many are chocolate or caramel?
candy %>% select(chocolate,caramel,sugarpercent) %>%
filter(chocolate == 1 | caramel == 1)
## # A tibble: 41 × 3
## chocolate caramel sugarpercent
## <dbl> <dbl> <dbl>
## 1 1 1 0.732
## 2 1 0 0.604
## 3 1 0 0.465
## 4 1 1 0.604
## 5 0 1 0.604
## 6 1 0 0.604
## 7 1 0 0.127
## 8 1 0 0.430
## 9 1 0 0.430
## 10 1 0 0.430
## # ℹ 31 more rows
Combine commands - connect with %>%
Is candy that is pluribus more expensive?
candy %>% group_by(pluribus) %>% summarise(count=n(),mean = mean(pricepercent))
candy %>%
group_by(pluribus) %>%
summarise(count=n(),mean = mean(pricepercent))
## # A tibble: 2 × 3
## pluribus count mean
## <dbl> <int> <dbl>
## 1 0 41 0.534
## 2 1 44 0.408
Is caramel more expensive? Yes!
candy %>%
group_by(caramel) %>%
summarise(count=n(),mean = mean(pricepercent))
## # A tibble: 2 × 3
## caramel count mean
## <dbl> <int> <dbl>
## 1 0 71 0.437
## 2 1 14 0.632
Is there more chocolate or non-chocolate? Non-Chocolate
candy %>%
group_by(chocolate) %>%
summarise(count=n())
## # A tibble: 2 × 2
## chocolate count
## <dbl> <int>
## 1 0 48
## 2 1 37
What is the winning-est fruit candy? 44.11%
candy %>%
group_by(fruity) %>%
summarise(count=n(),mean = mean(winpercent))
## # A tibble: 2 × 3
## fruity count mean
## <dbl> <int> <dbl>
## 1 0 47 55.3
## 2 1 38 44.1
Basic plots plot(candy\(sugarpercent,candy\)winpercent)
Fancier plots
ggplot(
data = candy,
mapping = aes(x = sugarpercent, y = winpercent, color = chocolate)
) +
geom_point()