R Under development (unstable) (2024-07-30 r86939 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(dplyr) Attaching package: 'dplyr' The following objects are masked from 'package:stats': filter, lag The following objects are masked from 'package:base': intersect, setdiff, setequal, union > library(rollup) Loading required package: tidyr Attaching package: 'rollup' The following objects are masked from 'package:dplyr': summarise, summarize > > data("web_service_data") > web_service_data %>% head # A tibble: 6 × 6 date_id id gender age page_view_cnt product_view_cnt_cat 1 2024-06-24 19 M 40 0 60% 2 2024-06-24 34 M 40 5 70% 3 2024-06-24 44 F 50 12 100% 4 2024-06-24 57 M 60 87 20% 5 2024-06-24 65 F 50 1 100% 6 2024-06-24 86 F 40 3 90% > > # avg_pv_cnt group by (gender, age, (gender, age)) > web_service_data %>% filter(date_id == '2024-06-30') %>% + group_by(gender, age) %>% grouping_sets('gender', 'age', c('gender','age')) %>% + summarize(avg_pv_cnt = mean(page_view_cnt)) # A tibble: 22 × 3 gender age avg_pv_cnt 1 F 2.28 2 M 1.92 3 N 1.63 4 10 1.62 5 20 3.01 6 30 2.23 7 40 1.77 8 50 1.44 9 60 2.30 10 F 10 2.33 # ℹ 12 more rows > > web_service_data %>% filter(date_id == '2024-06-30') %>% + group_by(gender, age) %>% grouping_sets('gender', 'age', c('gender','age')) An object of class "grouped_df_list" Slot "df_list": [[1]] # A tibble: 1,000 × 6 # Groups: gender [3] date_id id gender age page_view_cnt product_view_cnt_cat 1 2024-06-30 2 F 20 7 70% 2 2024-06-30 45 M 40 1 20% 3 2024-06-30 46 F 30 4 80% 4 2024-06-30 71 M 10 0 X 5 2024-06-30 77 M 40 3 90% 6 2024-06-30 91 M 20 1 50% 7 2024-06-30 94 F 60 1 20% 8 2024-06-30 96 F 60 2 50% 9 2024-06-30 105 F 40 0 60% 10 2024-06-30 132 M 60 1 X # ℹ 990 more rows [[2]] # A tibble: 1,000 × 6 # Groups: age [6] date_id id gender age page_view_cnt product_view_cnt_cat 1 2024-06-30 2 F 20 7 70% 2 2024-06-30 45 M 40 1 20% 3 2024-06-30 46 F 30 4 80% 4 2024-06-30 71 M 10 0 X 5 2024-06-30 77 M 40 3 90% 6 2024-06-30 91 M 20 1 50% 7 2024-06-30 94 F 60 1 20% 8 2024-06-30 96 F 60 2 50% 9 2024-06-30 105 F 40 0 60% 10 2024-06-30 132 M 60 1 X # ℹ 990 more rows [[3]] # A tibble: 1,000 × 6 # Groups: gender, age [13] date_id id gender age page_view_cnt product_view_cnt_cat 1 2024-06-30 2 F 20 7 70% 2 2024-06-30 45 M 40 1 20% 3 2024-06-30 46 F 30 4 80% 4 2024-06-30 71 M 10 0 X 5 2024-06-30 77 M 40 3 90% 6 2024-06-30 91 M 20 1 50% 7 2024-06-30 94 F 60 1 20% 8 2024-06-30 96 F 60 2 50% 9 2024-06-30 105 F 40 0 60% 10 2024-06-30 132 M 60 1 X # ℹ 990 more rows > > # avg_pv_cnt group by ((gender, age, product_view_cnt_cat), product_view_cnt_cat) > web_service_data %>% filter(date_id == '2024-06-30') %>% + group_by(gender, age, product_view_cnt_cat) %>% grouping_sets('product_view_cnt_cat', c('product_view_cnt_cat', 'gender', 'age')) %>% + summarize(avg_pv_cnt = mean(page_view_cnt)) %>% pivot_wider(names_from = product_view_cnt_cat, values_from = avg_pv_cnt) # A tibble: 14 × 11 gender age X `20%` `40%` `50%` `60%` `70%` `80%` `90%` `100%` 1 1.50 1.80 2.05 2.31 2.72 2.89 2.8 3.79 2.82 2 F 10 1.4 2 1.4 2.67 4 NA NA 4 NA 3 F 20 0 3.5 2.08 2.29 3.83 2.57 3.45 4.83 2.25 4 F 30 0.833 2.5 4.5 2.88 3 1.75 3.5 3 3.17 5 F 40 1.33 1.9 2.7 2.2 1.22 3 3.38 4 2 6 F 50 0.462 1.5 2 2.5 1.2 4 2.5 5.33 3.5 7 F 60 1.19 1.71 1 1.33 3 3 1.5 2 3 8 M 10 0.375 0.833 1.14 3 1 0 NA NA NA 9 M 20 1.14 3.17 3.16 3.55 4.5 3 NA 3.5 7 10 M 30 0.824 1.62 1.31 2.7 3.38 2.5 1.86 3.5 NA 11 M 40 0.889 0.933 2.06 0.833 1.88 3.25 1.6 1.67 NA 12 M 50 0.562 1.07 1.06 2.6 2 0 0.5 0 NA 13 M 60 3.06 2.69 4 3.5 0 8 2 1 NA 14 N 10 1.63 1.11 6 NA NA NA NA NA NA > > # avg_pv_cnt group by (gender, age, (gender, age)) > web_service_data %>% filter(date_id == '2024-06-30') %>% + group_by(gender, age) %>% with_rollup() %>% + summarize(avg_pv_cnt = mean(page_view_cnt)) # A tibble: 17 × 3 # Groups: gender [4] gender age avg_pv_cnt 1 F 10 2.33 2 F 20 2.86 3 F 30 2.67 4 F 40 2.33 5 F 50 2.24 6 F 60 1.48 7 M 10 0.92 8 M 20 3.19 9 M 30 1.91 10 M 40 1.31 11 M 50 0.907 12 M 60 2.99 13 N 10 1.63 14 F 2.28 15 M 1.92 16 N 1.63 17 2.04 > > proc.time() user system elapsed 1.42 0.18 1.57