library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr) 
library(ggplot2) 
library(knitr) 
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(tidycensus)
candy <- read_csv('candydata.csv', col_names = TRUE)
## Rows: 85 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): competitorname
## dbl (12): chocolate, fruity, caramel, peanutyalmondy, nougat, crispedricewaf...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Select

#Select chocolate, caramel and sugarpercent 
candy %>% select(chocolate,caramel,sugarpercent) %>% 
  arrange(desc(sugarpercent))
## # A tibble: 85 × 3
##    chocolate caramel sugarpercent
##        <dbl>   <dbl>        <dbl>
##  1         1       0        0.988
##  2         1       1        0.965
##  3         0       1        0.965
##  4         0       0        0.941
##  5         0       0        0.941
##  6         0       0        0.906
##  7         0       0        0.906
##  8         0       0        0.906
##  9         0       0        0.872
## 10         0       0        0.872
## # ℹ 75 more rows

Arrange

  candy %>% select(chocolate,caramel,sugarpercent) %>% 
              arrange((sugarpercent)) 
## # A tibble: 85 × 3
##    chocolate caramel sugarpercent
##        <dbl>   <dbl>        <dbl>
##  1         0       0       0.011 
##  2         0       0       0.011 
##  3         1       0       0.0340
##  4         0       0       0.046 
##  5         0       0       0.046 
##  6         0       0       0.0690
##  7         0       0       0.0690
##  8         0       0       0.0930
##  9         0       0       0.0930
## 10         0       0       0.0930
## # ℹ 75 more rows
## default is ascedning 

Filter

How many chocolate?

candy %>% select(chocolate,caramel,sugarpercent) %>% filter(chocolate == 1)

How many are chocolate and caramel?

  candy %>% select(chocolate,caramel,sugarpercent) %>% 
  filter(chocolate == 1, caramel == 1)
## # A tibble: 10 × 3
##    chocolate caramel sugarpercent
##        <dbl>   <dbl>        <dbl>
##  1         1       1        0.732
##  2         1       1        0.604
##  3         1       1        0.302
##  4         1       1        0.604
##  5         1       1        0.732
##  6         1       1        0.965
##  7         1       1        0.860
##  8         1       1        0.546
##  9         1       1        0.604
## 10         1       1        0.546

How many are chocolate or caramel?

  candy %>% select(chocolate,caramel,sugarpercent) %>% 
  filter(chocolate == 1 | caramel == 1)
## # A tibble: 41 × 3
##    chocolate caramel sugarpercent
##        <dbl>   <dbl>        <dbl>
##  1         1       1        0.732
##  2         1       0        0.604
##  3         1       0        0.465
##  4         1       1        0.604
##  5         0       1        0.604
##  6         1       0        0.604
##  7         1       0        0.127
##  8         1       0        0.430
##  9         1       0        0.430
## 10         1       0        0.430
## # ℹ 31 more rows

Combine commands - connect with %>%

Is candy that is pluribus more expensive?

candy %>% group_by(pluribus) %>% summarise(count=n(),mean = mean(pricepercent))

candy %>% 
  group_by(pluribus) %>% 
  summarise(count=n(),mean = mean(pricepercent))
## # A tibble: 2 × 3
##   pluribus count  mean
##      <dbl> <int> <dbl>
## 1        0    41 0.534
## 2        1    44 0.408

Practice

Is caramel more expensive? Yes!

candy %>% 
  group_by(caramel) %>% 
  summarise(count=n(),mean = mean(pricepercent))
## # A tibble: 2 × 3
##   caramel count  mean
##     <dbl> <int> <dbl>
## 1       0    71 0.437
## 2       1    14 0.632

Is there more chocolate or non-chocolate? Non-Chocolate

candy %>% 
  group_by(chocolate) %>% 
  summarise(count=n())
## # A tibble: 2 × 2
##   chocolate count
##       <dbl> <int>
## 1         0    48
## 2         1    37

What is the winning-est fruit candy? 44.11%

candy %>% 
  group_by(fruity) %>% 
  summarise(count=n(),mean = mean(winpercent))
## # A tibble: 2 × 3
##   fruity count  mean
##    <dbl> <int> <dbl>
## 1      0    47  55.3
## 2      1    38  44.1

Plotting

Basic plots plot(candy\(sugarpercent,candy\)winpercent)

Fancier plots

  ggplot(
    data = candy,
    mapping = aes(x = sugarpercent, y = winpercent, color = chocolate)
  ) +
    geom_point()