SlideShare a Scribd company logo
1 of 31
Download to read offline
dplyr 
@romain_francois
• Use R since 2002 
• R Enthusiast 
• R/C++ hero 
• Performance 
• dplyr 
• Occasional comedy
Nov 26th
dplyr 
dplyr 
dplyr dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr #rcatladies 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr 
dplyr 
dplyr 
dplyr 
dplyr 
dplyr dplyr
verb 
function that takes a 
data frame as its first 
argument
Examples of R verbs 
head, tail, … 
verb subject … 
> head( iris, n = 4 ) 
Sepal.Length Sepal.Width Petal.Length Petal.Width Species 
1 5.1 3.5 1.4 0.2 setosa 
2 4.9 3.0 1.4 0.2 setosa 
3 4.7 3.2 1.3 0.2 setosa 
4 4.6 3.1 1.5 0.2 setosa
%>% 
from magrittr
enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), 
"yeast", "water", "milk", "oil"), "flour", until = 
"soft"), duration = "3mins"), as = "balls", style = 
"slightly-flat"), degrees = 200, duration = "15mins"), 
duration = "5mins")) 
bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% 
append("flour", until = "soft") %>% 
beat(duration = "3mins") %>% 
shape(as = "balls", style = "slightly-flat") %>% 
bake(degrees = 200, duration = "15mins") %>% 
cool(buns, duration = "5mins") %>% 
enjoy()
enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), 
"yeast", "water", "milk", "oil"), "flour", until = 
"soft"), duration = "3mins"), as = "balls", style = 
"slightly-flat"), degrees = 200, duration = "15mins"), 
duration = "5mins")) 
bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% 
append("flour", until = "soft") %>% 
beat(duration = "3mins") %>% 
shape(as = "balls", style = "slightly-flat") %>% 
bake(degrees = 200, duration = "15mins") %>% 
cool(buns, duration = "5mins") %>% 
enjoy()
nycflights13: Data about flights departing NYC in 2013
> library("nycflights13") 
> flights 
Source: local data frame [336,776 x 16] 
year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight 
1 2013 1 1 517 2 830 11 UA N14228 1545 
2 2013 1 1 533 4 850 20 UA N24211 1714 
3 2013 1 1 542 2 923 33 AA N619AA 1141 
4 2013 1 1 544 -1 1004 -18 B6 N804JB 725 
5 2013 1 1 554 -6 812 -25 DL N668DN 461 
6 2013 1 1 554 -4 740 12 UA N39463 1696 
7 2013 1 1 555 -5 913 19 B6 N516JB 507 
8 2013 1 1 557 -3 709 -14 EV N829AS 5708 
9 2013 1 1 557 -3 838 -8 B6 N593JB 79 
10 2013 1 1 558 -2 753 8 AA N3ALAA 301 
.. ... ... ... ... ... ... ... ... ... ... 
Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl), 
hour (dbl), minute (dbl)
tbl_df 
A data frame that does print all of itself by default 
> data <- tbl_df(mtcars) 
> data 
Source: local data frame [32 x 11] 
mpg cyl disp hp drat wt qsec vs am gear carb 
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 
3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 
4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 
6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 
7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 
8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 
9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 
10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 
.. ... ... ... ... ... ... ... .. .. ... ...
filter 
A subset of the rows of the data frame
filter(flights, month == 1, day == 1) 
flights %>% 
filter( dep_delay < 10 ) 
flights %>% 
filter( arr_delay < dep_delay ) 
flights %>% 
filter( hour < 12, arr_delay <= 0 )
arrange reorder a data frame
flights %>% 
filter( hour < 8 ) %>% 
arrange( year, month, day ) 
flights %>% 
arrange( desc(dep_delay) )
select 
select certain columns from the data frame
# Select columns by name 
select(flights, year, month, day) 
# Select all columns between year and day 
select(flights, year:day) 
# Select all columns except those from year to 
# day (inclusive) 
select(flights, -(year:day))
mutate 
modify or create columns based on others
d <- flights %>% 
mutate( 
gain = arr_delay - dep_delay, 
speed = distance / air_time * 60 
) %>% 
filter( gain > 0 ) %>% 
arrange( desc(speed) ) 
d %>% 
select( year, month, day, dest, gain, speed )
summarise 
collapse a data frame into one row …
summarise(flights, 
delay = mean(dep_delay, na.rm = TRUE)) 
flights %>% 
filter( dep_delay > 0 ) %>% 
summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
group_by 
Group observations by one or more variables
flights %>% 
group_by( tailnum ) %>% 
summarise( 
count = n(), 
dist = mean(distance, na.rm = TRUE), 
delay = mean(arr_delay, na.rm = TRUE) 
) %>% 
filter( is.finite(delay) ) %>% 
arrange( desc(count) )
flights %>% 
group_by(dest) %>% 
summarise( 
planes = n_distinct(tailnum), 
flights = n() 
) %>% 
arrange( desc(flights) )
joins 
joining two data frames
inner_join 
all rows from x where there are matching 
values in y, and all columns from x and y. If there are multiple matches 
between x and y, all combination of the matches are returned. 
destinations <- flights %>% 
group_by(dest) %>% 
summarise( 
planes = n_distinct(tailnum), 
flights = n() 
) %>% 
arrange( desc(flights) ) %>% 
rename( faa = dest ) 
inner_join( destinations, airports, by = "faa")
inner_join 
all rows from x where there are matching 
values in y, and all columns from x and y. If there are multiple matches 
between x and y, all combination of the matches are returned. 
destinations <- flights %>% 
group_by(dest) %>% 
summarise( 
planes = n_distinct(tailnum), 
flights = n() 
) %>% 
arrange( desc(flights) ) 
inner_join( destinations, airports, 
by = c( "dest" = "faa" ) )
other joins 
See ?join 
• left_join, right_join 
• inner_join, outer_join 
• semi_join 
• anti_join
dplyr %>% summary 
• Simple verbs: filter, mutate, select, summarise, 
arrange 
• Grouping with group_by 
• Joins with *_join 
• Convenient with %>% 
• F✈️ST
dplyr 
Romain François 
@romain_francois 
romain@r-enthusiasts.com

More Related Content

What's hot

Bytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreterBytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreterakaptur
 
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...akaptur
 
Exploring slides
Exploring slidesExploring slides
Exploring slidesakaptur
 
Are we ready to Go?
Are we ready to Go?Are we ready to Go?
Are we ready to Go?Adam Dudczak
 
2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.js2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.jsNoritada Shimizu
 
Diving into byte code optimization in python
Diving into byte code optimization in python Diving into byte code optimization in python
Diving into byte code optimization in python Chetan Giridhar
 
โปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐานโปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐานknang
 
Playing 44CON CTF for fun and profit
Playing 44CON CTF for fun and profitPlaying 44CON CTF for fun and profit
Playing 44CON CTF for fun and profit44CON
 
C c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdoC c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdoKim Phillips
 
Python opcodes
Python opcodesPython opcodes
Python opcodesalexgolec
 
Modern c++ Memory Management
Modern c++ Memory ManagementModern c++ Memory Management
Modern c++ Memory ManagementAlan Uthoff
 
TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.lnikolaeva
 
Data Structure - 2nd Study
Data Structure - 2nd StudyData Structure - 2nd Study
Data Structure - 2nd StudyChris Ohk
 
Implementing Software Machines in Go and C
Implementing Software Machines in Go and CImplementing Software Machines in Go and C
Implementing Software Machines in Go and CEleanor McHugh
 
How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giantsIan Barber
 

What's hot (20)

Bytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreterBytes in the Machine: Inside the CPython interpreter
Bytes in the Machine: Inside the CPython interpreter
 
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
 
Exploring slides
Exploring slidesExploring slides
Exploring slides
 
Are we ready to Go?
Are we ready to Go?Are we ready to Go?
Are we ready to Go?
 
2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.js2016 gunma.web games-and-asm.js
2016 gunma.web games-and-asm.js
 
Diving into byte code optimization in python
Diving into byte code optimization in python Diving into byte code optimization in python
Diving into byte code optimization in python
 
Groovy
GroovyGroovy
Groovy
 
Go a crash course
Go   a crash courseGo   a crash course
Go a crash course
 
โปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐานโปรแกรมย่อยและฟังชันก์มาตรฐาน
โปรแกรมย่อยและฟังชันก์มาตรฐาน
 
20151224-games
20151224-games20151224-games
20151224-games
 
Playing 44CON CTF for fun and profit
Playing 44CON CTF for fun and profitPlaying 44CON CTF for fun and profit
Playing 44CON CTF for fun and profit
 
C c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdoC c++-meetup-1nov2017-autofdo
C c++-meetup-1nov2017-autofdo
 
Python opcodes
Python opcodesPython opcodes
Python opcodes
 
Modern c++ Memory Management
Modern c++ Memory ManagementModern c++ Memory Management
Modern c++ Memory Management
 
TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.TCO in Python via bytecode manipulation.
TCO in Python via bytecode manipulation.
 
Groovy
GroovyGroovy
Groovy
 
Data Structure - 2nd Study
Data Structure - 2nd StudyData Structure - 2nd Study
Data Structure - 2nd Study
 
Implementing Software Machines in Go and C
Implementing Software Machines in Go and CImplementing Software Machines in Go and C
Implementing Software Machines in Go and C
 
How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giants
 
Let's golang
Let's golangLet's golang
Let's golang
 

Viewers also liked

Object Oriented Design(s) in R
Object Oriented Design(s) in RObject Oriented Design(s) in R
Object Oriented Design(s) in RRomain Francois
 
dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbienRomain Francois
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyrRomain Francois
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Romain Francois
 
Integrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufIntegrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufRomain Francois
 

Viewers also liked (7)

Rcpp
RcppRcpp
Rcpp
 
Object Oriented Design(s) in R
Object Oriented Design(s) in RObject Oriented Design(s) in R
Object Oriented Design(s) in R
 
R/C++
R/C++R/C++
R/C++
 
dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbien
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
Integrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufIntegrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBuf
 

Similar to dplyr

SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrRomain Francois
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Pythonpugpe
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwnARUN DN
 
MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709Min-hyung Kim
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.Dr. Volkan OBAN
 
Megadata With Python and Hadoop
Megadata With Python and HadoopMegadata With Python and Hadoop
Megadata With Python and Hadoopryancox
 
Easy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtraEasy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtraBarry DeCicco
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1Ke Wei Louis
 
Let’s Talk About Ruby
Let’s Talk About RubyLet’s Talk About Ruby
Let’s Talk About RubyIan Bishop
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisUniversity of Illinois,Chicago
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisUniversity of Illinois,Chicago
 
Ruby Language - A quick tour
Ruby Language - A quick tourRuby Language - A quick tour
Ruby Language - A quick touraztack
 
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...InfluxData
 

Similar to dplyr (20)

SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittr
 
R programming language
R programming languageR programming language
R programming language
 
dplyr use case
dplyr use casedplyr use case
dplyr use case
 
Bash tricks
Bash tricksBash tricks
Bash tricks
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Python
 
Python 1
Python 1Python 1
Python 1
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwn
 
MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709MH prediction modeling and validation in r (1) regression 190709
MH prediction modeling and validation in r (1) regression 190709
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.
 
Megadata With Python and Hadoop
Megadata With Python and HadoopMegadata With Python and Hadoop
Megadata With Python and Hadoop
 
Easy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtraEasy HTML Tables in RStudio with Tabyl and kableExtra
Easy HTML Tables in RStudio with Tabyl and kableExtra
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1
 
dplyr-tutorial.pdf
dplyr-tutorial.pdfdplyr-tutorial.pdf
dplyr-tutorial.pdf
 
Let’s Talk About Ruby
Let’s Talk About RubyLet’s Talk About Ruby
Let’s Talk About Ruby
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency Analysis
 
Pumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency AnalysisPumps, Compressors and Turbine Fault Frequency Analysis
Pumps, Compressors and Turbine Fault Frequency Analysis
 
Ruby Language - A quick tour
Ruby Language - A quick tourRuby Language - A quick tour
Ruby Language - A quick tour
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
Samantha Wang [InfluxData] | Best Practices on How to Transform Your Data Usi...
 

More from Romain Francois (9)

dplyr
dplyrdplyr
dplyr
 
Rcpp11
Rcpp11Rcpp11
Rcpp11
 
R and C++
R and C++R and C++
R and C++
 
R and cpp
R and cppR and cpp
R and cpp
 
Rcpp attributes
Rcpp attributesRcpp attributes
Rcpp attributes
 
Rcpp is-ready
Rcpp is-readyRcpp is-ready
Rcpp is-ready
 
RProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for RRProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for R
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 

Recently uploaded

Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebUiPathCommunity
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Manik S Magar
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfLoriGlavin3
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsPixlogix Infotech
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.Curtis Poe
 
From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .Alan Dix
 
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxUse of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxLoriGlavin3
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024Stephanie Beckett
 
TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024Lonnie McRorey
 
WordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your BrandWordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your Brandgvaughan
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxLoriGlavin3
 
SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024Lorenzo Miniero
 
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxMerck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxLoriGlavin3
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsNathaniel Shimoni
 
"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii SoldatenkoFwdays
 
Generative AI for Technical Writer or Information Developers
Generative AI for Technical Writer or Information DevelopersGenerative AI for Technical Writer or Information Developers
Generative AI for Technical Writer or Information DevelopersRaghuram Pandurangan
 
SALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICESSALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICESmohitsingh558521
 
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteDianaGray10
 

Recently uploaded (20)

Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio Web
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdf
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and Cons
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.
 
From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .
 
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxUse of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024
 
TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024
 
WordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your BrandWordPress Websites for Engineers: Elevate Your Brand
WordPress Websites for Engineers: Elevate Your Brand
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
 
SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024SIP trunking in Janus @ Kamailio World 2024
SIP trunking in Janus @ Kamailio World 2024
 
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxMerck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directions
 
"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko
 
Generative AI for Technical Writer or Information Developers
Generative AI for Technical Writer or Information DevelopersGenerative AI for Technical Writer or Information Developers
Generative AI for Technical Writer or Information Developers
 
SALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICESSALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICES
 
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test Suite
 

dplyr

  • 2. • Use R since 2002 • R Enthusiast • R/C++ hero • Performance • dplyr • Occasional comedy
  • 4. dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr #rcatladies dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr dplyr
  • 5. verb function that takes a data frame as its first argument
  • 6. Examples of R verbs head, tail, … verb subject … > head( iris, n = 4 ) Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 5.1 3.5 1.4 0.2 setosa 2 4.9 3.0 1.4 0.2 setosa 3 4.7 3.2 1.3 0.2 setosa 4 4.6 3.1 1.5 0.2 setosa
  • 8. enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), "yeast", "water", "milk", "oil"), "flour", until = "soft"), duration = "3mins"), as = "balls", style = "slightly-flat"), degrees = 200, duration = "15mins"), duration = "5mins")) bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% append("flour", until = "soft") %>% beat(duration = "3mins") %>% shape(as = "balls", style = "slightly-flat") %>% bake(degrees = 200, duration = "15mins") %>% cool(buns, duration = "5mins") %>% enjoy()
  • 9. enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), "yeast", "water", "milk", "oil"), "flour", until = "soft"), duration = "3mins"), as = "balls", style = "slightly-flat"), degrees = 200, duration = "15mins"), duration = "5mins")) bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% append("flour", until = "soft") %>% beat(duration = "3mins") %>% shape(as = "balls", style = "slightly-flat") %>% bake(degrees = 200, duration = "15mins") %>% cool(buns, duration = "5mins") %>% enjoy()
  • 10. nycflights13: Data about flights departing NYC in 2013
  • 11. > library("nycflights13") > flights Source: local data frame [336,776 x 16] year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight 1 2013 1 1 517 2 830 11 UA N14228 1545 2 2013 1 1 533 4 850 20 UA N24211 1714 3 2013 1 1 542 2 923 33 AA N619AA 1141 4 2013 1 1 544 -1 1004 -18 B6 N804JB 725 5 2013 1 1 554 -6 812 -25 DL N668DN 461 6 2013 1 1 554 -4 740 12 UA N39463 1696 7 2013 1 1 555 -5 913 19 B6 N516JB 507 8 2013 1 1 557 -3 709 -14 EV N829AS 5708 9 2013 1 1 557 -3 838 -8 B6 N593JB 79 10 2013 1 1 558 -2 753 8 AA N3ALAA 301 .. ... ... ... ... ... ... ... ... ... ... Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl), hour (dbl), minute (dbl)
  • 12. tbl_df A data frame that does print all of itself by default > data <- tbl_df(mtcars) > data Source: local data frame [32 x 11] mpg cyl disp hp drat wt qsec vs am gear carb 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 .. ... ... ... ... ... ... ... .. .. ... ...
  • 13. filter A subset of the rows of the data frame
  • 14. filter(flights, month == 1, day == 1) flights %>% filter( dep_delay < 10 ) flights %>% filter( arr_delay < dep_delay ) flights %>% filter( hour < 12, arr_delay <= 0 )
  • 15. arrange reorder a data frame
  • 16. flights %>% filter( hour < 8 ) %>% arrange( year, month, day ) flights %>% arrange( desc(dep_delay) )
  • 17. select select certain columns from the data frame
  • 18. # Select columns by name select(flights, year, month, day) # Select all columns between year and day select(flights, year:day) # Select all columns except those from year to # day (inclusive) select(flights, -(year:day))
  • 19. mutate modify or create columns based on others
  • 20. d <- flights %>% mutate( gain = arr_delay - dep_delay, speed = distance / air_time * 60 ) %>% filter( gain > 0 ) %>% arrange( desc(speed) ) d %>% select( year, month, day, dest, gain, speed )
  • 21. summarise collapse a data frame into one row …
  • 22. summarise(flights, delay = mean(dep_delay, na.rm = TRUE)) flights %>% filter( dep_delay > 0 ) %>% summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
  • 23. group_by Group observations by one or more variables
  • 24. flights %>% group_by( tailnum ) %>% summarise( count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE) ) %>% filter( is.finite(delay) ) %>% arrange( desc(count) )
  • 25. flights %>% group_by(dest) %>% summarise( planes = n_distinct(tailnum), flights = n() ) %>% arrange( desc(flights) )
  • 26. joins joining two data frames
  • 27. inner_join all rows from x where there are matching values in y, and all columns from x and y. If there are multiple matches between x and y, all combination of the matches are returned. destinations <- flights %>% group_by(dest) %>% summarise( planes = n_distinct(tailnum), flights = n() ) %>% arrange( desc(flights) ) %>% rename( faa = dest ) inner_join( destinations, airports, by = "faa")
  • 28. inner_join all rows from x where there are matching values in y, and all columns from x and y. If there are multiple matches between x and y, all combination of the matches are returned. destinations <- flights %>% group_by(dest) %>% summarise( planes = n_distinct(tailnum), flights = n() ) %>% arrange( desc(flights) ) inner_join( destinations, airports, by = c( "dest" = "faa" ) )
  • 29. other joins See ?join • left_join, right_join • inner_join, outer_join • semi_join • anti_join
  • 30. dplyr %>% summary • Simple verbs: filter, mutate, select, summarise, arrange • Grouping with group_by • Joins with *_join • Convenient with %>% • F✈️ST
  • 31. dplyr Romain François @romain_francois romain@r-enthusiasts.com