R Under development (unstable) (2024-07-30 r86939 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(dplyr)

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

> library(rollup)
Loading required package: tidyr

Attaching package: 'rollup'

The following objects are masked from 'package:dplyr':

    summarise, summarize

> 
> data("web_service_data")
> web_service_data %>% head
# A tibble: 6 × 6
  date_id       id gender age   page_view_cnt product_view_cnt_cat
  <chr>      <dbl> <chr>  <fct>         <dbl> <fct>               
1 2024-06-24    19 M      40                0 60%                 
2 2024-06-24    34 M      40                5 70%                 
3 2024-06-24    44 F      50               12 100%                
4 2024-06-24    57 M      60               87 20%                 
5 2024-06-24    65 F      50                1 100%                
6 2024-06-24    86 F      40                3 90%                 
> 
> # avg_pv_cnt group by (gender, age, (gender, age))
> web_service_data %>% filter(date_id == '2024-06-30') %>% 
+   group_by(gender, age) %>% grouping_sets('gender', 'age', c('gender','age')) %>% 
+   summarize(avg_pv_cnt = mean(page_view_cnt))
# A tibble: 22 × 3
   gender age   avg_pv_cnt
   <chr>  <fct>      <dbl>
 1 F      <NA>        2.28
 2 M      <NA>        1.92
 3 N      <NA>        1.63
 4 <NA>   10          1.62
 5 <NA>   20          3.01
 6 <NA>   30          2.23
 7 <NA>   40          1.77
 8 <NA>   50          1.44
 9 <NA>   60          2.30
10 F      10          2.33
# ℹ 12 more rows
> 
> web_service_data %>% filter(date_id == '2024-06-30') %>% 
+   group_by(gender, age) %>% grouping_sets('gender', 'age', c('gender','age'))
An object of class "grouped_df_list"
Slot "df_list":
[[1]]
# A tibble: 1,000 × 6
# Groups:   gender [3]
   date_id       id gender age   page_view_cnt product_view_cnt_cat
   <chr>      <dbl> <chr>  <fct>         <dbl> <fct>               
 1 2024-06-30     2 F      20                7 70%                 
 2 2024-06-30    45 M      40                1 20%                 
 3 2024-06-30    46 F      30                4 80%                 
 4 2024-06-30    71 M      10                0 X                   
 5 2024-06-30    77 M      40                3 90%                 
 6 2024-06-30    91 M      20                1 50%                 
 7 2024-06-30    94 F      60                1 20%                 
 8 2024-06-30    96 F      60                2 50%                 
 9 2024-06-30   105 F      40                0 60%                 
10 2024-06-30   132 M      60                1 X                   
# ℹ 990 more rows

[[2]]
# A tibble: 1,000 × 6
# Groups:   age [6]
   date_id       id gender age   page_view_cnt product_view_cnt_cat
   <chr>      <dbl> <chr>  <fct>         <dbl> <fct>               
 1 2024-06-30     2 F      20                7 70%                 
 2 2024-06-30    45 M      40                1 20%                 
 3 2024-06-30    46 F      30                4 80%                 
 4 2024-06-30    71 M      10                0 X                   
 5 2024-06-30    77 M      40                3 90%                 
 6 2024-06-30    91 M      20                1 50%                 
 7 2024-06-30    94 F      60                1 20%                 
 8 2024-06-30    96 F      60                2 50%                 
 9 2024-06-30   105 F      40                0 60%                 
10 2024-06-30   132 M      60                1 X                   
# ℹ 990 more rows

[[3]]
# A tibble: 1,000 × 6
# Groups:   gender, age [13]
   date_id       id gender age   page_view_cnt product_view_cnt_cat
   <chr>      <dbl> <chr>  <fct>         <dbl> <fct>               
 1 2024-06-30     2 F      20                7 70%                 
 2 2024-06-30    45 M      40                1 20%                 
 3 2024-06-30    46 F      30                4 80%                 
 4 2024-06-30    71 M      10                0 X                   
 5 2024-06-30    77 M      40                3 90%                 
 6 2024-06-30    91 M      20                1 50%                 
 7 2024-06-30    94 F      60                1 20%                 
 8 2024-06-30    96 F      60                2 50%                 
 9 2024-06-30   105 F      40                0 60%                 
10 2024-06-30   132 M      60                1 X                   
# ℹ 990 more rows


> 
> # avg_pv_cnt group by ((gender, age, product_view_cnt_cat), product_view_cnt_cat)
> web_service_data %>% filter(date_id == '2024-06-30') %>% 
+   group_by(gender, age, product_view_cnt_cat) %>% grouping_sets('product_view_cnt_cat', c('product_view_cnt_cat', 'gender', 'age')) %>% 
+   summarize(avg_pv_cnt = mean(page_view_cnt)) %>% pivot_wider(names_from = product_view_cnt_cat, values_from = avg_pv_cnt)
# A tibble: 14 × 11
   gender age       X `20%` `40%`  `50%` `60%` `70%` `80%` `90%` `100%`
   <chr>  <fct> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
 1 <NA>   <NA>  1.50  1.80   2.05  2.31   2.72  2.89  2.8   3.79   2.82
 2 F      10    1.4   2      1.4   2.67   4    NA    NA     4     NA   
 3 F      20    0     3.5    2.08  2.29   3.83  2.57  3.45  4.83   2.25
 4 F      30    0.833 2.5    4.5   2.88   3     1.75  3.5   3      3.17
 5 F      40    1.33  1.9    2.7   2.2    1.22  3     3.38  4      2   
 6 F      50    0.462 1.5    2     2.5    1.2   4     2.5   5.33   3.5 
 7 F      60    1.19  1.71   1     1.33   3     3     1.5   2      3   
 8 M      10    0.375 0.833  1.14  3      1     0    NA    NA     NA   
 9 M      20    1.14  3.17   3.16  3.55   4.5   3    NA     3.5    7   
10 M      30    0.824 1.62   1.31  2.7    3.38  2.5   1.86  3.5   NA   
11 M      40    0.889 0.933  2.06  0.833  1.88  3.25  1.6   1.67  NA   
12 M      50    0.562 1.07   1.06  2.6    2     0     0.5   0     NA   
13 M      60    3.06  2.69   4     3.5    0     8     2     1     NA   
14 N      10    1.63  1.11   6    NA     NA    NA    NA    NA     NA   
> 
> # avg_pv_cnt group by (gender, age, (gender, age))
> web_service_data %>% filter(date_id == '2024-06-30') %>% 
+   group_by(gender, age) %>% with_rollup() %>% 
+   summarize(avg_pv_cnt = mean(page_view_cnt))
# A tibble: 17 × 3
# Groups:   gender [4]
   gender age   avg_pv_cnt
   <chr>  <fct>      <dbl>
 1 F      10         2.33 
 2 F      20         2.86 
 3 F      30         2.67 
 4 F      40         2.33 
 5 F      50         2.24 
 6 F      60         1.48 
 7 M      10         0.92 
 8 M      20         3.19 
 9 M      30         1.91 
10 M      40         1.31 
11 M      50         0.907
12 M      60         2.99 
13 N      10         1.63 
14 F      <NA>       2.28 
15 M      <NA>       1.92 
16 N      <NA>       1.63 
17 <NA>   <NA>       2.04 
> 
> proc.time()
   user  system elapsed 
   1.42    0.18    1.57