From 249bed289e295f5c59bc0f2b6327958f14e10406 Mon Sep 17 00:00:00 2001
From: Quarto GHA Workflow Runner <quarto-github-actions-publish@example.com>
Date: Wed, 6 Sep 2023 11:10:58 +0000
Subject: [PATCH] Built site for gh-pages

---
 .nojekyll                                     |   2 +-
 materials/2_data_manipulation_1.html          | 267 +++++++-----
 .../4_data_manipulation_2-exercises.html      | 161 ++++---
 materials/4_data_manipulation_2.html          | 394 ++++++++++++------
 materials/images/segfault.png                 | Bin 0 -> 16866 bytes
 search.json                                   | 174 ++++++--
 sitemap.xml                                   |  32 +-
 7 files changed, 696 insertions(+), 334 deletions(-)
 create mode 100644 materials/images/segfault.png
diff --git a/.nojekyll b/.nojekyll
index 5596367..f2c122c 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-4fa3a878
\ No newline at end of file
+0204bc42
\ No newline at end of file
diff --git a/materials/2_data_manipulation_1.html b/materials/2_data_manipulation_1.html
index 5f64fde..6de7025 100644
--- a/materials/2_data_manipulation_1.html
+++ b/materials/2_data_manipulation_1.html
@@ -394,6 +394,11 @@
 <h1>Data Manipulation—Part 1</h1>
 
 </section>
+<section id="goals" class="slide level2">
+<h2>Goals</h2>
+<p>Avoiding these! But…don’t worry!</p>
+
+<img data-src="images/segfault.png" class="r-stretch"></section>
 <section id="arrow-datasets" class="slide level2">
 <h2>Arrow Datasets</h2>
 
@@ -524,6 +529,11 @@ <h2>calling <code>nrow()</code> to see how much data</h2>
 <span id="cb11-2"><a href="#cb11-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2017</span><span class="sc">:</span><span class="dv">2021</span>) <span class="sc">|&gt;</span></span>
 <span id="cb11-3"><a href="#cb11-3"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
+<pre><code>Called from: dim.arrow_dplyr_query(x)
+debug: rows &lt;- Scanner$create(x)$CountRows()
+debug: c(rows, cols)</code></pre>
+</div>
+<div class="cell-output cell-output-stdout">
 <pre><code>[1] 356236190</code></pre>
 </div>
 </div>
@@ -531,15 +541,15 @@ <h2>calling <code>nrow()</code> to see how much data</h2>
 <section id="calling-nrow-doesnt-work-with-intermediate-step" class="slide level2">
 <h2>calling <code>nrow()</code> doesn’t work with intermediate step</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb13-2"><a href="#cb13-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2017</span><span class="sc">:</span><span class="dv">2021</span>) <span class="sc">|&gt;</span></span>
-<span id="cb13-3"><a href="#cb13-3"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb13-4"><a href="#cb13-4"></a>  <span class="fu">summarize</span>(</span>
-<span id="cb13-5"><a href="#cb13-5"></a>    <span class="at">all_trips =</span> <span class="fu">n</span>(),</span>
-<span id="cb13-6"><a href="#cb13-6"></a>    <span class="at">shared_trips =</span> <span class="fu">sum</span>(passenger_count <span class="sc">&gt;</span> <span class="dv">1</span>, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb13-7"><a href="#cb13-7"></a>  ) <span class="sc">|&gt;</span></span>
-<span id="cb13-8"><a href="#cb13-8"></a>  <span class="fu">mutate</span>(<span class="at">pct_shared =</span> shared_trips <span class="sc">/</span> all_trips <span class="sc">*</span> <span class="dv">100</span>) <span class="sc">|&gt;</span></span>
-<span id="cb13-9"><a href="#cb13-9"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb14-2"><a href="#cb14-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2017</span><span class="sc">:</span><span class="dv">2021</span>) <span class="sc">|&gt;</span></span>
+<span id="cb14-3"><a href="#cb14-3"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb14-4"><a href="#cb14-4"></a>  <span class="fu">summarize</span>(</span>
+<span id="cb14-5"><a href="#cb14-5"></a>    <span class="at">all_trips =</span> <span class="fu">n</span>(),</span>
+<span id="cb14-6"><a href="#cb14-6"></a>    <span class="at">shared_trips =</span> <span class="fu">sum</span>(passenger_count <span class="sc">&gt;</span> <span class="dv">1</span>, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
+<span id="cb14-7"><a href="#cb14-7"></a>  ) <span class="sc">|&gt;</span></span>
+<span id="cb14-8"><a href="#cb14-8"></a>  <span class="fu">mutate</span>(<span class="at">pct_shared =</span> shared_trips <span class="sc">/</span> all_trips <span class="sc">*</span> <span class="dv">100</span>) <span class="sc">|&gt;</span></span>
+<span id="cb14-9"><a href="#cb14-9"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>[1] NA</code></pre>
 </div>
@@ -548,16 +558,16 @@ <h2>calling <code>nrow()</code> doesn’t work with intermediate step</h2>
 <section id="use-compute-to-execute-intermediate-steps" class="slide level2">
 <h2>use <code>compute()</code> to execute intermediate steps</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb15" data-code-line-numbers="9"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb15-2"><a href="#cb15-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2017</span><span class="sc">:</span><span class="dv">2021</span>) <span class="sc">|&gt;</span></span>
-<span id="cb15-3"><a href="#cb15-3"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb15-4"><a href="#cb15-4"></a>  <span class="fu">summarize</span>(</span>
-<span id="cb15-5"><a href="#cb15-5"></a>    <span class="at">all_trips =</span> <span class="fu">n</span>(),</span>
-<span id="cb15-6"><a href="#cb15-6"></a>    <span class="at">shared_trips =</span> <span class="fu">sum</span>(passenger_count <span class="sc">&gt;</span> <span class="dv">1</span>, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb15-7"><a href="#cb15-7"></a>  ) <span class="sc">|&gt;</span></span>
-<span id="cb15-8"><a href="#cb15-8"></a>  <span class="fu">mutate</span>(<span class="at">pct_shared =</span> shared_trips <span class="sc">/</span> all_trips <span class="sc">*</span> <span class="dv">100</span>) <span class="sc">|&gt;</span></span>
-<span id="cb15-9"><a href="#cb15-9"></a>  <span class="fu">compute</span>() <span class="sc">|&gt;</span></span>
-<span id="cb15-10"><a href="#cb15-10"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb16" data-code-line-numbers="9"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb16-2"><a href="#cb16-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2017</span><span class="sc">:</span><span class="dv">2021</span>) <span class="sc">|&gt;</span></span>
+<span id="cb16-3"><a href="#cb16-3"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb16-4"><a href="#cb16-4"></a>  <span class="fu">summarize</span>(</span>
+<span id="cb16-5"><a href="#cb16-5"></a>    <span class="at">all_trips =</span> <span class="fu">n</span>(),</span>
+<span id="cb16-6"><a href="#cb16-6"></a>    <span class="at">shared_trips =</span> <span class="fu">sum</span>(passenger_count <span class="sc">&gt;</span> <span class="dv">1</span>, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
+<span id="cb16-7"><a href="#cb16-7"></a>  ) <span class="sc">|&gt;</span></span>
+<span id="cb16-8"><a href="#cb16-8"></a>  <span class="fu">mutate</span>(<span class="at">pct_shared =</span> shared_trips <span class="sc">/</span> all_trips <span class="sc">*</span> <span class="dv">100</span>) <span class="sc">|&gt;</span></span>
+<span id="cb16-9"><a href="#cb16-9"></a>  <span class="fu">compute</span>() <span class="sc">|&gt;</span></span>
+<span id="cb16-10"><a href="#cb16-10"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>[1] 5</code></pre>
 </div>
@@ -575,48 +585,49 @@ <h2>Your Turn</h2>
 <h2>use <code>head()</code> then <code>collect()</code> to preview output for large queries</h2>
 <p>How much were fares in GBP (£)?</p>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1"></a>fares_pounds <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb17-2"><a href="#cb17-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2012</span><span class="sc">:</span><span class="dv">2015</span>) <span class="sc">|&gt;</span></span>
-<span id="cb17-3"><a href="#cb17-3"></a>  <span class="fu">mutate</span>(</span>
-<span id="cb17-4"><a href="#cb17-4"></a>    <span class="at">fare_amount_pounds =</span> fare_amount <span class="sc">*</span> <span class="fl">0.79</span></span>
-<span id="cb17-5"><a href="#cb17-5"></a>  ) <span class="sc">|&gt;</span></span>
-<span id="cb17-6"><a href="#cb17-6"></a>  <span class="fu">select</span>(fare_amount, fare_amount_pounds)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a>fares_pounds <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb18-2"><a href="#cb18-2"></a>  <span class="fu">mutate</span>(</span>
+<span id="cb18-3"><a href="#cb18-3"></a>    <span class="at">fare_amount_pounds =</span> fare_amount <span class="sc">*</span> <span class="fl">0.79</span></span>
+<span id="cb18-4"><a href="#cb18-4"></a>  )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>How many rows?</p>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a>fares_pounds <span class="sc">|&gt;</span></span>
-<span id="cb18-2"><a href="#cb18-2"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1"></a>fares_pounds <span class="sc">|&gt;</span></span>
+<span id="cb19-2"><a href="#cb19-2"></a>  <span class="fu">nrow</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>[1] 662951433</code></pre>
+<pre><code>[1] 1150352666</code></pre>
 </div>
 </div>
 </section>
-<section id="use-head-then-collect-to-preview-output" class="slide level2">
-<h2>use <code>head()</code> then <code>collect()</code> to preview output</h2>
+<section id="use-head-select-filter-and-collect-to-preview-results" class="slide level2">
+<h2>Use <code>head()</code>, <code>select()</code>, <code>filter()</code>, and <code>collect()</code> to preview results</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1"></a>fares_pounds <span class="sc">|&gt;</span></span>
-<span id="cb20-2"><a href="#cb20-2"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
-<span id="cb20-3"><a href="#cb20-3"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb21-2"><a href="#cb21-2"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2020</span>) <span class="sc">|&gt;</span></span>
+<span id="cb21-3"><a href="#cb21-3"></a>  <span class="fu">mutate</span>(<span class="at">fare_pounds =</span> fare_amount <span class="sc">*</span> <span class="fl">0.79</span>) <span class="sc">|&gt;</span></span>
+<span id="cb21-4"><a href="#cb21-4"></a>  <span class="fu">select</span>(fare_amount, fare_pounds) <span class="sc">|&gt;</span></span>
+<span id="cb21-5"><a href="#cb21-5"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
+<span id="cb21-6"><a href="#cb21-6"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 6 × 2
-  fare_amount fare_amount_pounds
-        &lt;dbl&gt;              &lt;dbl&gt;
-1        29.7              23.5 
-2         9.3               7.35
-3         4.1               3.24
-4         4.5               3.56
-5         4.5               3.56
-6         4.1               3.24</code></pre>
+  fare_amount fare_pounds
+        &lt;dbl&gt;       &lt;dbl&gt;
+1         8          6.32
+2        17         13.4 
+3         6.5        5.14
+4         7          5.53
+5         6.5        5.14
+6        42         33.2 </code></pre>
 </div>
 </div>
 </section>
 <section id="use-across-to-transform-data-in-multiple-columns" class="slide level2">
 <h2>use <code>across()</code> to transform data in multiple columns</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1"></a>taxis_gbp <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb22-2"><a href="#cb22-2"></a>  <span class="fu">mutate</span>(<span class="fu">across</span>(<span class="fu">ends_with</span>(<span class="st">"amount"</span>), <span class="fu">list</span>(<span class="at">pounds =</span> <span class="sc">~</span>.x <span class="sc">*</span> <span class="fl">0.79</span>)))</span>
-<span id="cb22-3"><a href="#cb22-3"></a></span>
-<span id="cb22-4"><a href="#cb22-4"></a>taxis_gbp</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>taxis_gbp <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb23-2"><a href="#cb23-2"></a>  <span class="fu">mutate</span>(<span class="fu">across</span>(<span class="fu">ends_with</span>(<span class="st">"amount"</span>), <span class="fu">list</span>(<span class="at">pounds =</span> <span class="sc">~</span>.x <span class="sc">*</span> <span class="fl">0.79</span>)))</span>
+<span id="cb23-3"><a href="#cb23-3"></a></span>
+<span id="cb23-4"><a href="#cb23-4"></a>taxis_gbp</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>FileSystemDataset (query)
 vendor_name: string
@@ -655,10 +666,10 @@ <h2>use <code>across()</code> to transform data in multiple columns</h2>
 <section id="use-across-to-transform-data-in-multiple-columns-1" class="slide level2">
 <h2>use <code>across()</code> to transform data in multiple columns</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1"></a>taxis_gbp <span class="sc">|&gt;</span></span>
-<span id="cb24-2"><a href="#cb24-2"></a>  <span class="fu">select</span>(<span class="fu">contains</span>(<span class="st">"amount"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb24-3"><a href="#cb24-3"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
-<span id="cb24-4"><a href="#cb24-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1"></a>taxis_gbp <span class="sc">|&gt;</span></span>
+<span id="cb25-2"><a href="#cb25-2"></a>  <span class="fu">select</span>(<span class="fu">contains</span>(<span class="st">"amount"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb25-3"><a href="#cb25-3"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
+<span id="cb25-4"><a href="#cb25-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 6 × 8
   fare_amount tip_amount tolls_amount total_amount fare_amount_pounds
@@ -673,6 +684,16 @@ <h2>use <code>across()</code> to transform data in multiple columns</h2>
 #   total_amount_pounds &lt;dbl&gt;</code></pre>
 </div>
 </div>
+</section>
+<section id="summary" class="slide level2">
+<h2>Summary</h2>
+<ul>
+<li>Use <code>nrow()</code> to work out how many rows of data your analyses will return</li>
+<li>Use <code>compute()</code> when you need to execute intermediate steps</li>
+<li>Use <code>collect()</code> to pull all of the data into your R session</li>
+<li>Use <code>head()</code>, <code>select()</code>, <code>filter()</code>, and <code>collect()</code> to preview results</li>
+<li>Use <code>across()</code> to manipulate data in multiple columns at once</li>
+</ul>
 </section></section>
 <section>
 <section id="dplyr-api-in-arrow---what-is-and-isnt-implemented" class="title-slide slide level1 center">
@@ -683,12 +704,12 @@ <h1>dplyr API in arrow - what is and isn’t implemented?</h1>
 <h2>example - <code>slice()</code></h2>
 <p>First three trips in the dataset in 2021 where distance &gt; 100 miles</p>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1"></a>long_rides_2021 <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb26-2"><a href="#cb26-2"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2021</span> <span class="sc">&amp;</span> trip_distance <span class="sc">&gt;</span> <span class="dv">100</span>) <span class="sc">|&gt;</span></span>
-<span id="cb26-3"><a href="#cb26-3"></a>  <span class="fu">select</span>(pickup_datetime, year, trip_distance)</span>
-<span id="cb26-4"><a href="#cb26-4"></a></span>
-<span id="cb26-5"><a href="#cb26-5"></a>long_rides_2021 <span class="sc">|&gt;</span></span>
-<span id="cb26-6"><a href="#cb26-6"></a>  <span class="fu">slice</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1"></a>long_rides_2021 <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb27-2"><a href="#cb27-2"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2021</span> <span class="sc">&amp;</span> trip_distance <span class="sc">&gt;</span> <span class="dv">100</span>) <span class="sc">|&gt;</span></span>
+<span id="cb27-3"><a href="#cb27-3"></a>  <span class="fu">select</span>(pickup_datetime, year, trip_distance)</span>
+<span id="cb27-4"><a href="#cb27-4"></a></span>
+<span id="cb27-5"><a href="#cb27-5"></a>long_rides_2021 <span class="sc">|&gt;</span></span>
+<span id="cb27-6"><a href="#cb27-6"></a>  <span class="fu">slice</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-error">
 <pre><code>Error in UseMethod("slice"): no applicable method for 'slice' applied to an object of class "arrow_dplyr_query"</code></pre>
 </div>
@@ -697,16 +718,16 @@ <h2>example - <code>slice()</code></h2>
 <section id="head-to-the-docs" class="slide level2">
 <h2>head to the docs!</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1"></a>?<span class="st">`</span><span class="at">arrow-dplyr</span><span class="st">`</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1"></a>?<span class="st">`</span><span class="at">arrow-dplyr</span><span class="st">`</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>or view them at <a href="https://arrow.apache.org/docs/r/reference/acero.html">https://arrow.apache.org/docs/r/reference/acero.html</a></p>
+<p>or view them at <a href="https://arrow.apache.org/docs/r/reference/acero.html" class="uri">https://arrow.apache.org/docs/r/reference/acero.html</a></p>
 </section>
 <section id="a-different-function" class="slide level2">
 <h2>A different function</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1"></a>long_rides_2021 <span class="sc">|&gt;</span></span>
-<span id="cb29-2"><a href="#cb29-2"></a>  <span class="fu">slice_max</span>(<span class="at">n =</span> <span class="dv">3</span>, <span class="at">order_by =</span> trip_distance, <span class="at">with_ties =</span> <span class="cn">FALSE</span>) <span class="sc">|&gt;</span></span>
-<span id="cb29-3"><a href="#cb29-3"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1"></a>long_rides_2021 <span class="sc">|&gt;</span></span>
+<span id="cb30-2"><a href="#cb30-2"></a>  <span class="fu">slice_max</span>(<span class="at">n =</span> <span class="dv">3</span>, <span class="at">order_by =</span> trip_distance, <span class="at">with_ties =</span> <span class="cn">FALSE</span>) <span class="sc">|&gt;</span></span>
+<span id="cb30-3"><a href="#cb30-3"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 3 × 3
   pickup_datetime      year trip_distance
@@ -720,29 +741,29 @@ <h2>A different function</h2>
 <section id="or-call-collect-first" class="slide level2">
 <h2>Or call <code>collect()</code> first</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1"></a>long_rides_2021 <span class="sc">|&gt;</span></span>
-<span id="cb31-2"><a href="#cb31-2"></a>  <span class="fu">collect</span>() <span class="sc">|&gt;</span></span>
-<span id="cb31-3"><a href="#cb31-3"></a>  <span class="fu">slice</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1"></a>long_rides_2021 <span class="sc">|&gt;</span></span>
+<span id="cb32-2"><a href="#cb32-2"></a>  <span class="fu">collect</span>() <span class="sc">|&gt;</span></span>
+<span id="cb32-3"><a href="#cb32-3"></a>  <span class="fu">slice</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 3 × 3
   pickup_datetime      year trip_distance
   &lt;dttm&gt;              &lt;int&gt;         &lt;dbl&gt;
-1 2021-01-03 09:01:26  2021          216.
-2 2021-01-03 11:36:52  2021          268.
-3 2021-10-02 15:04:53  2021          188.</code></pre>
+1 2021-01-06 07:27:55  2021          271.
+2 2021-01-03 09:01:26  2021          216.
+3 2021-01-03 11:36:52  2021          268.</code></pre>
 </div>
 </div>
 </section>
 <section id="tidyr-functions---pivot" class="slide level2">
 <h2>tidyr functions - pivot</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1"></a><span class="fu">library</span>(tidyr)</span>
-<span id="cb33-2"><a href="#cb33-2"></a></span>
-<span id="cb33-3"><a href="#cb33-3"></a>nyc_taxi <span class="sc">|&gt;</span> </span>
-<span id="cb33-4"><a href="#cb33-4"></a>  <span class="fu">group_by</span>(vendor_name) <span class="sc">|&gt;</span></span>
-<span id="cb33-5"><a href="#cb33-5"></a>  <span class="fu">summarise</span>(<span class="at">max_fare =</span> <span class="fu">max</span>(fare_amount), <span class="at">min_fare =</span> <span class="fu">min</span>(fare_amount)) <span class="sc">|&gt;</span></span>
-<span id="cb33-6"><a href="#cb33-6"></a>  <span class="fu">pivot_longer</span>(<span class="sc">!</span>vendor_name, <span class="at">names_to =</span> <span class="st">"metric"</span>) <span class="sc">|&gt;</span> </span>
-<span id="cb33-7"><a href="#cb33-7"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1"></a><span class="fu">library</span>(tidyr)</span>
+<span id="cb34-2"><a href="#cb34-2"></a></span>
+<span id="cb34-3"><a href="#cb34-3"></a>nyc_taxi <span class="sc">|&gt;</span> </span>
+<span id="cb34-4"><a href="#cb34-4"></a>  <span class="fu">group_by</span>(vendor_name) <span class="sc">|&gt;</span></span>
+<span id="cb34-5"><a href="#cb34-5"></a>  <span class="fu">summarise</span>(<span class="at">max_fare =</span> <span class="fu">max</span>(fare_amount)) <span class="sc">|&gt;</span></span>
+<span id="cb34-6"><a href="#cb34-6"></a>  <span class="fu">pivot_longer</span>(<span class="sc">!</span>vendor_name, <span class="at">names_to =</span> <span class="st">"metric"</span>) <span class="sc">|&gt;</span> </span>
+<span id="cb34-7"><a href="#cb34-7"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-error">
 <pre><code>Error in UseMethod("pivot_longer"): no applicable method for 'pivot_longer' applied to an object of class "arrow_dplyr_query"</code></pre>
 </div>
@@ -755,25 +776,22 @@ <h2>duckdb</h2>
 <section id="tidyr-functions---pivot-with-duckdb" class="slide level2">
 <h2>tidyr functions - pivot with duckdb!</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1"></a><span class="fu">library</span>(duckdb)</span>
-<span id="cb35-2"><a href="#cb35-2"></a></span>
-<span id="cb35-3"><a href="#cb35-3"></a>nyc_taxi <span class="sc">|&gt;</span> </span>
-<span id="cb35-4"><a href="#cb35-4"></a>  <span class="fu">group_by</span>(vendor_name) <span class="sc">|&gt;</span></span>
-<span id="cb35-5"><a href="#cb35-5"></a>  <span class="fu">summarise</span>(<span class="at">max_fare =</span> <span class="fu">max</span>(fare_amount), <span class="at">min_fare =</span> <span class="fu">min</span>(fare_amount)) <span class="sc">|&gt;</span></span>
-<span id="cb35-6"><a href="#cb35-6"></a>  <span class="fu">to_duckdb</span>() <span class="sc">|&gt;</span> <span class="co"># send data to duckdb</span></span>
-<span id="cb35-7"><a href="#cb35-7"></a>  <span class="fu">pivot_longer</span>(<span class="sc">!</span>vendor_name, <span class="at">names_to =</span> <span class="st">"metric"</span>) <span class="sc">|&gt;</span> </span>
-<span id="cb35-8"><a href="#cb35-8"></a>  <span class="fu">to_arrow</span>() <span class="sc">|&gt;</span> <span class="co"># return data back to arrow</span></span>
-<span id="cb35-9"><a href="#cb35-9"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1"></a><span class="fu">library</span>(duckdb)</span>
+<span id="cb36-2"><a href="#cb36-2"></a></span>
+<span id="cb36-3"><a href="#cb36-3"></a>nyc_taxi <span class="sc">|&gt;</span> </span>
+<span id="cb36-4"><a href="#cb36-4"></a>  <span class="fu">group_by</span>(vendor_name) <span class="sc">|&gt;</span></span>
+<span id="cb36-5"><a href="#cb36-5"></a>  <span class="fu">summarise</span>(<span class="at">max_fare =</span> <span class="fu">max</span>(fare_amount)) <span class="sc">|&gt;</span></span>
+<span id="cb36-6"><a href="#cb36-6"></a>  <span class="fu">to_duckdb</span>() <span class="sc">|&gt;</span> <span class="co"># send data to duckdb</span></span>
+<span id="cb36-7"><a href="#cb36-7"></a>  <span class="fu">pivot_longer</span>(<span class="sc">!</span>vendor_name, <span class="at">names_to =</span> <span class="st">"metric"</span>) <span class="sc">|&gt;</span> </span>
+<span id="cb36-8"><a href="#cb36-8"></a>  <span class="fu">to_arrow</span>() <span class="sc">|&gt;</span> <span class="co"># return data back to arrow</span></span>
+<span id="cb36-9"><a href="#cb36-9"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code># A tibble: 6 × 3
+<pre><code># A tibble: 3 × 3
   vendor_name metric     value
   &lt;chr&gt;       &lt;chr&gt;      &lt;dbl&gt;
 1 CMT         max_fare 998310.
 2 VTS         max_fare  10000.
-3 &lt;NA&gt;        max_fare   3555.
-4 CMT         min_fare   -652.
-5 VTS         min_fare  -1856 
-6 &lt;NA&gt;        min_fare   -150.</code></pre>
+3 &lt;NA&gt;        max_fare   3555.</code></pre>
 </div>
 </div>
 <div class="callout callout-caution callout-titled callout-style-default">
@@ -785,7 +803,7 @@ <h2>tidyr functions - pivot with duckdb!</h2>
 <p><strong>Requires arrow 13.0.0</strong></p>
 </div>
 <div class="callout-content">
-<p>This code requires arrow 13.0.0 or above to run, due to a bug which was fixed in this version</p>
+<p>This code requires arrow 13.0.0 or above to run, due to a bugfix in this version</p>
 </div>
 </div>
 </div>
@@ -805,10 +823,10 @@ <h2>Using functions inside verbs</h2>
 <section id="morning-vs-afternoon-with-namespacing" class="slide level2">
 <h2>Morning vs afternoon with namespacing</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb37" data-code-line-numbers="2"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb37-2"><a href="#cb37-2"></a>  <span class="fu">group_by</span>(<span class="at">time_of_day =</span> <span class="fu">ifelse</span>(lubridate<span class="sc">::</span><span class="fu">am</span>(pickup_datetime), <span class="st">"morning"</span>, <span class="st">"afternoon"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb37-3"><a href="#cb37-3"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
-<span id="cb37-4"><a href="#cb37-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb38" data-code-line-numbers="2"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb38-2"><a href="#cb38-2"></a>  <span class="fu">group_by</span>(<span class="at">time_of_day =</span> <span class="fu">ifelse</span>(lubridate<span class="sc">::</span><span class="fu">am</span>(pickup_datetime), <span class="st">"morning"</span>, <span class="st">"afternoon"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb38-3"><a href="#cb38-3"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
+<span id="cb38-4"><a href="#cb38-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 2 × 2
 # Groups:   time_of_day [2]
@@ -822,12 +840,12 @@ <h2>Morning vs afternoon with namespacing</h2>
 <section id="morning-vs-afternoon---without-namespacing" class="slide level2">
 <h2>Morning vs afternoon - without namespacing</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb39" data-code-line-numbers="2"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1"></a><span class="fu">library</span>(lubridate)</span>
-<span id="cb39-2"><a href="#cb39-2"></a></span>
-<span id="cb39-3"><a href="#cb39-3"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb39-4"><a href="#cb39-4"></a>  <span class="fu">group_by</span>(<span class="at">time_of_day =</span> <span class="fu">ifelse</span>(<span class="fu">am</span>(pickup_datetime), <span class="st">"morning"</span>, <span class="st">"afternoon"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb39-5"><a href="#cb39-5"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
-<span id="cb39-6"><a href="#cb39-6"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb40" data-code-line-numbers="4"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1"></a><span class="fu">library</span>(lubridate)</span>
+<span id="cb40-2"><a href="#cb40-2"></a></span>
+<span id="cb40-3"><a href="#cb40-3"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb40-4"><a href="#cb40-4"></a>  <span class="fu">group_by</span>(<span class="at">time_of_day =</span> <span class="fu">ifelse</span>(<span class="fu">am</span>(pickup_datetime), <span class="st">"morning"</span>, <span class="st">"afternoon"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb40-5"><a href="#cb40-5"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
+<span id="cb40-6"><a href="#cb40-6"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 2 × 2
 # Groups:   time_of_day [2]
@@ -838,12 +856,58 @@ <h2>Morning vs afternoon - without namespacing</h2>
 </div>
 </div>
 </section>
+<section id="what-if-a-function-isnt-implemented" class="slide level2">
+<h2>What if a function isn’t implemented?</h2>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb42-2"><a href="#cb42-2"></a>  <span class="fu">mutate</span>(<span class="at">vendor_name =</span> <span class="fu">na_if</span>(vendor_name, <span class="st">"CMT"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb42-3"><a href="#cb42-3"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
+<span id="cb42-4"><a href="#cb42-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-error">
+<pre><code>Error: Expression na_if(vendor_name, "CMT") not supported in Arrow
+Call collect() first to pull data into R.</code></pre>
+</div>
+</div>
+</section>
 <section id="head-to-the-docs-again-to-see-whats-implemented" class="slide level2">
 <h2>Head to the docs again to see what’s implemented!</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb41"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1"></a>?<span class="st">`</span><span class="at">arrow-dplyr</span><span class="st">`</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb44"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1"></a>?<span class="st">`</span><span class="at">arrow-dplyr</span><span class="st">`</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>or view them at <a href="https://arrow.apache.org/docs/r/reference/acero.html">https://arrow.apache.org/docs/r/reference/acero.html</a></p>
+<p>or view them at <a href="https://arrow.apache.org/docs/r/reference/acero.html" class="uri">https://arrow.apache.org/docs/r/reference/acero.html</a></p>
+</section>
+<section id="option-1---find-a-workaround" class="slide level2">
+<h2>Option 1 - find a workaround!</h2>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb45"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb45-1"><a href="#cb45-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb45-2"><a href="#cb45-2"></a>  <span class="fu">mutate</span>(<span class="at">vendor_name =</span> <span class="fu">ifelse</span>(vendor_name <span class="sc">==</span> <span class="st">"CMT"</span>, <span class="cn">NA</span>, vendor_name)) <span class="sc">|&gt;</span></span>
+<span id="cb45-3"><a href="#cb45-3"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
+<span id="cb45-4"><a href="#cb45-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code># A tibble: 6 × 24
+  vendor_name pickup_datetime     dropoff_datetime    passenger_count
+  &lt;chr&gt;       &lt;dttm&gt;              &lt;dttm&gt;                        &lt;int&gt;
+1 &lt;NA&gt;        2012-01-20 14:09:36 2012-01-20 14:42:25               1
+2 &lt;NA&gt;        2012-01-20 14:54:10 2012-01-20 15:06:55               1
+3 &lt;NA&gt;        2012-01-20 08:08:01 2012-01-20 08:11:02               1
+4 &lt;NA&gt;        2012-01-20 08:36:22 2012-01-20 08:39:44               1
+5 &lt;NA&gt;        2012-01-20 20:58:32 2012-01-20 21:03:04               1
+6 &lt;NA&gt;        2012-01-20 19:40:20 2012-01-20 19:43:43               2
+# ℹ 20 more variables: trip_distance &lt;dbl&gt;, pickup_longitude &lt;dbl&gt;,
+#   pickup_latitude &lt;dbl&gt;, rate_code &lt;chr&gt;, store_and_fwd &lt;chr&gt;,
+#   dropoff_longitude &lt;dbl&gt;, dropoff_latitude &lt;dbl&gt;, payment_type &lt;chr&gt;,
+#   fare_amount &lt;dbl&gt;, extra &lt;dbl&gt;, mta_tax &lt;dbl&gt;, tip_amount &lt;dbl&gt;,
+#   tolls_amount &lt;dbl&gt;, total_amount &lt;dbl&gt;, improvement_surcharge &lt;dbl&gt;,
+#   congestion_surcharge &lt;dbl&gt;, pickup_location_id &lt;int&gt;,
+#   dropoff_location_id &lt;int&gt;, year &lt;int&gt;, month &lt;int&gt;</code></pre>
+</div>
+</div>
+</section>
+<section id="option-2" class="slide level2">
+<h2>Option 2</h2>
+<ul>
+<li>In data manipulation part 2!</li>
+</ul>
 </section>
 <section id="your-turn-1" class="slide level2">
 <h2>Your Turn</h2>
@@ -854,12 +918,13 @@ <h2>Your Turn</h2>
 </ol>
 <p>➡️ <a href="2_data_manipulation_1-exercises.html">Data Manipulation Part I Exercises Page</a></p>
 </section>
-<section id="summary" class="slide level2">
+<section id="summary-1" class="slide level2">
 <h2>Summary</h2>
 <ul>
 <li>Working with Arrow datasets allow you to manipulate data which is larger-than-memory</li>
 <li>You can use many dplyr functions with arrow - run <code>?\</code>arrow-dplyr`` to view the docs</li>
 <li>You can pass data to duckdb to use functions implemented in dbplyr and duckdb but not arrow</li>
+<li>Sometimes the easiest solution is an alternative path</li>
 </ul>
 
 
diff --git a/materials/4_data_manipulation_2-exercises.html b/materials/4_data_manipulation_2-exercises.html
index 5944437..52ff237 100644
--- a/materials/4_data_manipulation_2-exercises.html
+++ b/materials/4_data_manipulation_2-exercises.html
@@ -266,13 +266,13 @@ <h1 class="title">Data Manipulation Part 2 - Exercises</h1>
 month: int32</code></pre>
 </div>
 </div>
-<div id="exercise-joins" class="callout callout-style-default callout-tip callout-titled">
+<div id="exercise-udfs" class="callout callout-style-default callout-tip callout-titled">
 <div class="callout-header d-flex align-content-center">
 <div class="callout-icon-container">
 <i class="callout-icon"></i>
 </div>
 <div class="callout-title-container flex-fill">
-Joins
+User-defined functions
 </div>
 </div>
 <div class="callout-body-container callout-body">
@@ -281,37 +281,104 @@ <h1 class="title">Data Manipulation Part 2 - Exercises</h1>
 <div class="tab-content">
 <div id="tabset-1-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-1-1-tab">
 <ol type="1">
-<li>How many taxi pickups were recorded in 2019 from the three major airports covered by the NYC Taxis data set (JFK, LaGuardia, Newark)?</li>
+<li>Write a user-defined function which wraps the <code>stringr</code> function <code>str_replace_na()</code>, and use it to replace any <code>NA</code> values in the <code>vendor_name</code> column with the string “No vendor” instead.</li>
 </ol>
 </div>
 <div id="tabset-1-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-1-2-tab">
 <div class="cell">
-<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>pickup_location <span class="ot">&lt;-</span> <span class="fu">read_csv_arrow</span>(here<span class="sc">::</span><span class="fu">here</span>(<span class="st">"data/taxi_zone_lookup.csv"</span>))</span>
-<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>pickup_location <span class="ot">&lt;-</span> pickup_location <span class="sc">|&gt;</span></span>
-<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(</span>
-<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_location_id =</span> LocationID,</span>
-<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">borough =</span> Borough,</span>
-<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_zone =</span> Zone</span>
-<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">|&gt;</span></span>
-<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrow_table</span>(<span class="at">schema =</span> <span class="fu">schema</span>(</span>
-<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_location_id =</span> <span class="fu">int64</span>(),</span>
-<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">borough =</span> <span class="fu">utf8</span>(),</span>
-<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_zone =</span> <span class="fu">utf8</span>()</span>
-<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a>  ))</span>
-<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>) <span class="sc">|&gt;</span></span>
-<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">left_join</span>(pickup_location) <span class="sc">|&gt;</span></span>
-<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="fu">str_detect</span>(pickup_zone, <span class="st">"Airport"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>(pickup_zone) <span class="sc">|&gt;</span></span>
-<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Preview the distinct vendor names before we start</span></span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>) <span class="sc">|&gt;</span> <span class="co"># smaller subset of the data</span></span>
+<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">distinct</span>(vendor_name) <span class="sc">|&gt;</span></span>
+<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code># A tibble: 3 × 1
+  vendor_name
+  &lt;chr&gt;      
+1 CMT        
+2 VTS        
+3 &lt;NA&gt;       </code></pre>
+</div>
+</div>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">register_scalar_function</span>(</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>  <span class="at">name =</span> <span class="st">"replace_vendor_na"</span>,</span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>  <span class="cf">function</span>(context, string) {</span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>    stringr<span class="sc">::</span><span class="fu">str_replace_na</span>(string, <span class="st">"No vendor"</span>)</span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>  },</span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">in_type =</span> <span class="fu">schema</span>(<span class="at">string =</span> <span class="fu">string</span>()),</span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">out_type =</span> <span class="fu">string</span>(),</span>
+<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">auto_convert =</span> <span class="cn">TRUE</span></span>
+<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>vendor_names_fixed <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">vendor_name =</span> <span class="fu">replace_vendor_na</span>(vendor_name)) </span>
+<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Preview the distinct vendor names to check it's worked</span></span>
+<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a>vendor_names_fixed <span class="sc">|&gt;</span></span>
+<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>) <span class="sc">|&gt;</span> <span class="co"># smaller subset of the data</span></span>
+<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">distinct</span>(vendor_name) <span class="sc">|&gt;</span></span>
+<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code># A tibble: 3 × 1
+  vendor_name
+  &lt;chr&gt;      
+1 CMT        
+2 VTS        
+3 No vendor  </code></pre>
+</div>
+</div>
+</div>
+</div>
+</div>
+</div>
+</div>
+<div id="exercise-joins" class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Joins
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<div class="tabset-margin-container"></div><div class="panel-tabset">
+<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-2-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-1" role="tab" aria-controls="tabset-2-1" aria-selected="true">Problem</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-2-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-2" role="tab" aria-controls="tabset-2-2" aria-selected="false">Solution 1</a></li></ul>
+<div class="tab-content">
+<div id="tabset-2-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-2-1-tab">
+<ol type="1">
+<li>How many taxi pickups were recorded in 2019 from the three major airports covered by the NYC Taxis data set (JFK, LaGuardia, Newark)?</li>
+</ol>
+</div>
+<div id="tabset-2-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-2-2-tab">
+<div class="cell">
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>pickup_location <span class="ot">&lt;-</span> <span class="fu">read_csv_arrow</span>(here<span class="sc">::</span><span class="fu">here</span>(<span class="st">"data/taxi_zone_lookup.csv"</span>))</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>pickup_location <span class="ot">&lt;-</span> pickup_location <span class="sc">|&gt;</span></span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(</span>
+<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_location_id =</span> LocationID,</span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">borough =</span> Borough,</span>
+<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_zone =</span> Zone</span>
+<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">|&gt;</span></span>
+<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrow_table</span>(<span class="at">schema =</span> <span class="fu">schema</span>(</span>
+<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_location_id =</span> <span class="fu">int64</span>(),</span>
+<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">borough =</span> <span class="fu">utf8</span>(),</span>
+<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a>    <span class="at">pickup_zone =</span> <span class="fu">utf8</span>()</span>
+<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a>  ))</span>
+<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>) <span class="sc">|&gt;</span></span>
+<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">left_join</span>(pickup_location) <span class="sc">|&gt;</span></span>
+<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="fu">str_detect</span>(pickup_zone, <span class="st">"Airport"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>(pickup_zone) <span class="sc">|&gt;</span></span>
+<span id="cb8-20"><a href="#cb8-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 3 × 2
   pickup_zone             n
   &lt;chr&gt;               &lt;int&gt;
-1 LaGuardia Airport 2159224
-2 JFK Airport       2729336
+1 JFK Airport       2729336
+2 LaGuardia Airport 2159224
 3 Newark Airport       8643</code></pre>
 </div>
 </div>
@@ -331,25 +398,25 @@ <h1 class="title">Data Manipulation Part 2 - Exercises</h1>
 </div>
 <div class="callout-body-container callout-body">
 <div class="tabset-margin-container"></div><div class="panel-tabset">
-<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-2-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-1" role="tab" aria-controls="tabset-2-1" aria-selected="true">Problem</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-2-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-2" role="tab" aria-controls="tabset-2-2" aria-selected="false">Solution 1</a></li></ul>
+<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-3-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-3-1" role="tab" aria-controls="tabset-3-1" aria-selected="true">Problem</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-3-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-3-2" role="tab" aria-controls="tabset-3-2" aria-selected="false">Solution 1</a></li></ul>
 <div class="tab-content">
-<div id="tabset-2-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-2-1-tab">
+<div id="tabset-3-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-3-1-tab">
 <ol type="1">
 <li>How many trips in September 2019 had a longer than average distance for that month?</li>
 </ol>
 </div>
-<div id="tabset-2-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-2-2-tab">
+<div id="tabset-3-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-3-2-tab">
 <section id="option-1---via-duckdb" class="level3">
 <h3 class="anchored" data-anchor-id="option-1---via-duckdb">Option 1 - via DuckDB</h3>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>, month <span class="sc">==</span> <span class="dv">9</span>) <span class="sc">|&gt;</span></span>
-<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">to_duckdb</span>() <span class="sc">|&gt;</span></span>
-<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">mean_distance =</span> <span class="fu">mean</span>(trip_distance)) <span class="sc">|&gt;</span></span>
-<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">to_arrow</span>() <span class="sc">|&gt;</span></span>
-<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(trip_distance <span class="sc">&lt;</span> mean_distance) <span class="sc">|&gt;</span></span>
-<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
-<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>, month <span class="sc">==</span> <span class="dv">9</span>) <span class="sc">|&gt;</span></span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">to_duckdb</span>() <span class="sc">|&gt;</span></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">mean_distance =</span> <span class="fu">mean</span>(trip_distance)) <span class="sc">|&gt;</span></span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">to_arrow</span>() <span class="sc">|&gt;</span></span>
+<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(trip_distance <span class="sc">&lt;</span> mean_distance) <span class="sc">|&gt;</span></span>
+<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
+<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 1 × 1
         n
@@ -361,17 +428,17 @@ <h3 class="anchored" data-anchor-id="option-1---via-duckdb">Option 1 - via DuckD
 <section id="option-2---via-a-join" class="level3">
 <h3 class="anchored" data-anchor-id="option-2---via-a-join">Option 2 - via a join</h3>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>, month <span class="sc">==</span> <span class="dv">9</span>) <span class="sc">|&gt;</span></span>
-<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">left_join</span>(</span>
-<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>    nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>      <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>, month <span class="sc">==</span> <span class="dv">9</span>) <span class="sc">|&gt;</span></span>
-<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>      <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>      <span class="fu">summarise</span>(<span class="at">mean_distance =</span> <span class="fu">mean</span>(trip_distance))</span>
-<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>    ) <span class="sc">|&gt;</span></span>
-<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(trip_distance <span class="sc">&lt;</span> mean_distance) <span class="sc">|&gt;</span></span>
-<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
-<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>, month <span class="sc">==</span> <span class="dv">9</span>) <span class="sc">|&gt;</span></span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">left_join</span>(</span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>    nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>      <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2019</span>, month <span class="sc">==</span> <span class="dv">9</span>) <span class="sc">|&gt;</span></span>
+<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a>      <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a>      <span class="fu">summarise</span>(<span class="at">mean_distance =</span> <span class="fu">mean</span>(trip_distance))</span>
+<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a>    ) <span class="sc">|&gt;</span></span>
+<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(trip_distance <span class="sc">&lt;</span> mean_distance) <span class="sc">|&gt;</span></span>
+<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
+<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 1 × 1
         n
diff --git a/materials/4_data_manipulation_2.html b/materials/4_data_manipulation_2.html
index 980cd59..3452fe3 100644
--- a/materials/4_data_manipulation_2.html
+++ b/materials/4_data_manipulation_2.html
@@ -389,11 +389,174 @@
     <div class="slides">
 
 
+<section>
 <section id="data-manip-2" class="title-slide slide level1 center">
 <h1>Data Manipulation—Part 2</h1>
 
 </section>
-
+<section id="what-if-a-function-binding-doesnt-exist---revisited" class="slide level2">
+<h2>What if a function binding doesn’t exist - revisited!</h2>
+<ul>
+<li>Option 1 - find a workaround</li>
+<li>Option 2 - user-defined functions (UDFs)</li>
+</ul>
+</section>
+<section id="why-use-a-udf" class="slide level2">
+<h2>Why use a UDF?</h2>
+<p>Sometimes it’s hard to find a workaround</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb1-2"><a href="#cb1-2"></a>  <span class="fu">mutate</span>(<span class="at">duration_minutes =</span> <span class="fu">difftime</span>(pickup_datetime, dropoff_datetime, <span class="at">units =</span> <span class="st">"minutes"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb1-3"><a href="#cb1-3"></a>  <span class="fu">select</span>(pickup_datetime, dropoff_datetime, duration_minutes) <span class="sc">|&gt;</span></span>
+<span id="cb1-4"><a href="#cb1-4"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
+<span id="cb1-5"><a href="#cb1-5"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-error">
+<pre><code>Error: In difftime(pickup_datetime, dropoff_datetime, units = "minutes"), `difftime()` with units other than `secs` not supported in Arrow
+Call collect() first to pull data into R.</code></pre>
+</div>
+</div>
+</section></section>
+<section>
+<section id="user-defined-functions-aka-udfs" class="title-slide slide level1 center">
+<h1>User-defined functions (aka UDFs)</h1>
+<ul>
+<li>Define your own functions</li>
+<li>Scalar functions only - 1 row input and 1 row output</li>
+</ul>
+</section>
+<section id="user-defined-functions---definition" class="slide level2">
+<h2>User-defined functions - definition</h2>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a><span class="fu">register_scalar_function</span>(</span>
+<span id="cb3-2"><a href="#cb3-2"></a>  <span class="at">name =</span> <span class="st">"time_diff_minutes"</span>,</span>
+<span id="cb3-3"><a href="#cb3-3"></a>  <span class="cf">function</span>(context, pickup, dropoff) {</span>
+<span id="cb3-4"><a href="#cb3-4"></a>    <span class="fu">difftime</span>(dropoff, pickup, <span class="at">units =</span> <span class="st">"mins"</span>) <span class="sc">|&gt;</span></span>
+<span id="cb3-5"><a href="#cb3-5"></a>      <span class="fu">round</span>() <span class="sc">|&gt;</span></span>
+<span id="cb3-6"><a href="#cb3-6"></a>      <span class="fu">as.integer</span>()</span>
+<span id="cb3-7"><a href="#cb3-7"></a>  },</span>
+<span id="cb3-8"><a href="#cb3-8"></a>  <span class="at">in_type =</span> <span class="fu">schema</span>(</span>
+<span id="cb3-9"><a href="#cb3-9"></a>    <span class="at">pickup =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>),</span>
+<span id="cb3-10"><a href="#cb3-10"></a>    <span class="at">dropoff =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>)</span>
+<span id="cb3-11"><a href="#cb3-11"></a>  ),</span>
+<span id="cb3-12"><a href="#cb3-12"></a>  <span class="at">out_type =</span> <span class="fu">int32</span>(),</span>
+<span id="cb3-13"><a href="#cb3-13"></a>  <span class="at">auto_convert =</span> <span class="cn">TRUE</span></span>
+<span id="cb3-14"><a href="#cb3-14"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="user-defined-functions---definition-1" class="slide level2">
+<h2>User-defined functions - definition</h2>
+<p>Give the function a name</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb4" data-code-line-numbers="2"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a><span class="fu">register_scalar_function</span>(</span>
+<span id="cb4-2"><a href="#cb4-2"></a>  <span class="at">name =</span> <span class="st">"time_diff_minutes"</span>,</span>
+<span id="cb4-3"><a href="#cb4-3"></a>  <span class="cf">function</span>(context, pickup, dropoff) {</span>
+<span id="cb4-4"><a href="#cb4-4"></a>    <span class="fu">difftime</span>(dropoff, pickup, <span class="at">units =</span> <span class="st">"mins"</span>) <span class="sc">|&gt;</span></span>
+<span id="cb4-5"><a href="#cb4-5"></a>      <span class="fu">round</span>() <span class="sc">|&gt;</span></span>
+<span id="cb4-6"><a href="#cb4-6"></a>      <span class="fu">as.integer</span>()</span>
+<span id="cb4-7"><a href="#cb4-7"></a>  },</span>
+<span id="cb4-8"><a href="#cb4-8"></a>  <span class="at">in_type =</span> <span class="fu">schema</span>(</span>
+<span id="cb4-9"><a href="#cb4-9"></a>    <span class="at">pickup =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>),</span>
+<span id="cb4-10"><a href="#cb4-10"></a>    <span class="at">dropoff =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>)</span>
+<span id="cb4-11"><a href="#cb4-11"></a>  ),</span>
+<span id="cb4-12"><a href="#cb4-12"></a>  <span class="at">out_type =</span> <span class="fu">int32</span>(),</span>
+<span id="cb4-13"><a href="#cb4-13"></a>  <span class="at">auto_convert =</span> <span class="cn">TRUE</span></span>
+<span id="cb4-14"><a href="#cb4-14"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="user-defined-functions---definition-2" class="slide level2">
+<h2>User-defined functions - definition</h2>
+<p>Define the body of the function - first argument <em>must</em> be <code>context</code></p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb5" data-code-line-numbers="3,4,5,6,7"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a><span class="fu">register_scalar_function</span>(</span>
+<span id="cb5-2"><a href="#cb5-2"></a>  <span class="at">name =</span> <span class="st">"time_diff_minutes"</span>,</span>
+<span id="cb5-3"><a href="#cb5-3"></a>  <span class="cf">function</span>(context, pickup, dropoff) {</span>
+<span id="cb5-4"><a href="#cb5-4"></a>    <span class="fu">difftime</span>(dropoff, pickup, <span class="at">units =</span> <span class="st">"mins"</span>) <span class="sc">|&gt;</span></span>
+<span id="cb5-5"><a href="#cb5-5"></a>      <span class="fu">round</span>() <span class="sc">|&gt;</span></span>
+<span id="cb5-6"><a href="#cb5-6"></a>      <span class="fu">as.integer</span>()</span>
+<span id="cb5-7"><a href="#cb5-7"></a>  },</span>
+<span id="cb5-8"><a href="#cb5-8"></a>  <span class="at">in_type =</span> <span class="fu">schema</span>(</span>
+<span id="cb5-9"><a href="#cb5-9"></a>    <span class="at">pickup =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>),</span>
+<span id="cb5-10"><a href="#cb5-10"></a>    <span class="at">dropoff =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>)</span>
+<span id="cb5-11"><a href="#cb5-11"></a>  ),</span>
+<span id="cb5-12"><a href="#cb5-12"></a>  <span class="at">out_type =</span> <span class="fu">int32</span>(),</span>
+<span id="cb5-13"><a href="#cb5-13"></a>  <span class="at">auto_convert =</span> <span class="cn">TRUE</span></span>
+<span id="cb5-14"><a href="#cb5-14"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="user-defined-functions---definition-3" class="slide level2">
+<h2>User-defined functions - definition</h2>
+<p>Define the schema of the input arguments</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb6" data-code-line-numbers="8,9,10,11"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a><span class="fu">register_scalar_function</span>(</span>
+<span id="cb6-2"><a href="#cb6-2"></a>  <span class="at">name =</span> <span class="st">"time_diff_minutes"</span>,</span>
+<span id="cb6-3"><a href="#cb6-3"></a>  <span class="cf">function</span>(context, pickup, dropoff) {</span>
+<span id="cb6-4"><a href="#cb6-4"></a>    <span class="fu">difftime</span>(dropoff, pickup, <span class="at">units =</span> <span class="st">"mins"</span>) <span class="sc">|&gt;</span></span>
+<span id="cb6-5"><a href="#cb6-5"></a>      <span class="fu">round</span>() <span class="sc">|&gt;</span></span>
+<span id="cb6-6"><a href="#cb6-6"></a>      <span class="fu">as.integer</span>()</span>
+<span id="cb6-7"><a href="#cb6-7"></a>  },</span>
+<span id="cb6-8"><a href="#cb6-8"></a>  <span class="at">in_type =</span> <span class="fu">schema</span>(</span>
+<span id="cb6-9"><a href="#cb6-9"></a>    <span class="at">pickup =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>),</span>
+<span id="cb6-10"><a href="#cb6-10"></a>    <span class="at">dropoff =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>)</span>
+<span id="cb6-11"><a href="#cb6-11"></a>  ),</span>
+<span id="cb6-12"><a href="#cb6-12"></a>  <span class="at">out_type =</span> <span class="fu">int32</span>(),</span>
+<span id="cb6-13"><a href="#cb6-13"></a>  <span class="at">auto_convert =</span> <span class="cn">TRUE</span></span>
+<span id="cb6-14"><a href="#cb6-14"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="user-defined-functions---definition-4" class="slide level2">
+<h2>User-defined functions - definition</h2>
+<p>Define the data type of the output</p>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb7" data-code-line-numbers="12"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a><span class="fu">register_scalar_function</span>(</span>
+<span id="cb7-2"><a href="#cb7-2"></a>  <span class="at">name =</span> <span class="st">"time_diff_minutes"</span>,</span>
+<span id="cb7-3"><a href="#cb7-3"></a>  <span class="cf">function</span>(context, pickup, dropoff) {</span>
+<span id="cb7-4"><a href="#cb7-4"></a>    <span class="fu">difftime</span>(dropoff, pickup, <span class="at">units =</span> <span class="st">"mins"</span>) <span class="sc">|&gt;</span></span>
+<span id="cb7-5"><a href="#cb7-5"></a>      <span class="fu">round</span>() <span class="sc">|&gt;</span></span>
+<span id="cb7-6"><a href="#cb7-6"></a>      <span class="fu">as.integer</span>()</span>
+<span id="cb7-7"><a href="#cb7-7"></a>  },</span>
+<span id="cb7-8"><a href="#cb7-8"></a>  <span class="at">in_type =</span> <span class="fu">schema</span>(</span>
+<span id="cb7-9"><a href="#cb7-9"></a>    <span class="at">pickup =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>),</span>
+<span id="cb7-10"><a href="#cb7-10"></a>    <span class="at">dropoff =</span> <span class="fu">timestamp</span>(<span class="at">unit =</span> <span class="st">"ms"</span>)</span>
+<span id="cb7-11"><a href="#cb7-11"></a>  ),</span>
+<span id="cb7-12"><a href="#cb7-12"></a>  <span class="at">out_type =</span> <span class="fu">int32</span>(),</span>
+<span id="cb7-13"><a href="#cb7-13"></a>  <span class="at">auto_convert =</span> <span class="cn">TRUE</span></span>
+<span id="cb7-14"><a href="#cb7-14"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section>
+<section id="user-defined-functions---usage" class="slide level2">
+<h2>User-defined functions - usage</h2>
+<div class="cell">
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb8-2"><a href="#cb8-2"></a>  <span class="fu">mutate</span>(<span class="at">duration_minutes =</span> <span class="fu">time_diff_minutes</span>(pickup_datetime, dropoff_datetime)) <span class="sc">|&gt;</span></span>
+<span id="cb8-3"><a href="#cb8-3"></a>  <span class="fu">select</span>(pickup_datetime, dropoff_datetime, duration_minutes) <span class="sc">|&gt;</span></span>
+<span id="cb8-4"><a href="#cb8-4"></a>  <span class="fu">head</span>() <span class="sc">|&gt;</span></span>
+<span id="cb8-5"><a href="#cb8-5"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code># A tibble: 6 × 3
+  pickup_datetime     dropoff_datetime    duration_minutes
+  &lt;dttm&gt;              &lt;dttm&gt;                         &lt;int&gt;
+1 2012-11-02 23:40:32 2012-11-02 23:58:16               18
+2 2012-11-02 23:40:41 2012-11-02 23:45:56                5
+3 2012-11-02 23:40:50 2012-11-02 23:49:20                8
+4 2012-11-02 23:40:52 2012-11-02 23:46:15                5
+5 2012-11-02 23:41:00 2012-11-02 23:44:00                3
+6 2012-11-02 23:41:00 2012-11-02 23:45:00                4</code></pre>
+</div>
+</div>
+</section>
+<section id="your-turn" class="slide level2">
+<h2>Your Turn</h2>
+<ol type="1">
+<li>Write a user-defined function which wraps the <code>stringr</code> function <code>str_replace_na()</code>, and use it to replace any <code>NA</code> values in the <code>vendor_name</code> column with the string “No vendor” instead.</li>
+</ol>
+<p>➡️ <a href="4_data_manipulation_2-exercises.html">Data Manipulation Part I Exercises Page</a></p>
+</section>
+<section id="summary" class="slide level2">
+<h2>Summary</h2>
+<ul>
+<li>You can use UDFs to create your own bindings when they don’t exist!</li>
+</ul>
+</section></section>
 <section>
 <section id="joins" class="title-slide slide level1 center">
 <h1>Joins</h1>
@@ -402,39 +565,39 @@ <h1>Joins</h1>
 <section id="joining-a-reference-table" class="slide level2">
 <h2>Joining a reference table</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a>vendors <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">tibble</span>(</span>
-<span id="cb1-2"><a href="#cb1-2"></a>  <span class="at">code =</span> <span class="fu">c</span>(<span class="st">"VTS"</span>, <span class="st">"CMT"</span>, <span class="st">"DDS"</span>),</span>
-<span id="cb1-3"><a href="#cb1-3"></a>  <span class="at">full_name =</span> <span class="fu">c</span>(</span>
-<span id="cb1-4"><a href="#cb1-4"></a>    <span class="st">"Verifone Transportation Systems"</span>,</span>
-<span id="cb1-5"><a href="#cb1-5"></a>    <span class="st">"Creative Mobile Technologies"</span>,</span>
-<span id="cb1-6"><a href="#cb1-6"></a>    <span class="st">"Digital Dispatch Systems"</span></span>
-<span id="cb1-7"><a href="#cb1-7"></a>  )</span>
-<span id="cb1-8"><a href="#cb1-8"></a>)</span>
-<span id="cb1-9"><a href="#cb1-9"></a></span>
-<span id="cb1-10"><a href="#cb1-10"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb1-11"><a href="#cb1-11"></a>  <span class="fu">left_join</span>(vendors, <span class="at">by =</span> <span class="fu">c</span>(<span class="st">"vendor_name"</span> <span class="ot">=</span> <span class="st">"code"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb1-12"><a href="#cb1-12"></a>  <span class="fu">select</span>(vendor_name, full_name, pickup_datetime) <span class="sc">|&gt;</span></span>
-<span id="cb1-13"><a href="#cb1-13"></a>  <span class="fu">head</span>(<span class="dv">3</span>) <span class="sc">|&gt;</span></span>
-<span id="cb1-14"><a href="#cb1-14"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a>vendors <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">tibble</span>(</span>
+<span id="cb10-2"><a href="#cb10-2"></a>  <span class="at">code =</span> <span class="fu">c</span>(<span class="st">"VTS"</span>, <span class="st">"CMT"</span>, <span class="st">"DDS"</span>),</span>
+<span id="cb10-3"><a href="#cb10-3"></a>  <span class="at">full_name =</span> <span class="fu">c</span>(</span>
+<span id="cb10-4"><a href="#cb10-4"></a>    <span class="st">"Verifone Transportation Systems"</span>,</span>
+<span id="cb10-5"><a href="#cb10-5"></a>    <span class="st">"Creative Mobile Technologies"</span>,</span>
+<span id="cb10-6"><a href="#cb10-6"></a>    <span class="st">"Digital Dispatch Systems"</span></span>
+<span id="cb10-7"><a href="#cb10-7"></a>  )</span>
+<span id="cb10-8"><a href="#cb10-8"></a>)</span>
+<span id="cb10-9"><a href="#cb10-9"></a></span>
+<span id="cb10-10"><a href="#cb10-10"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb10-11"><a href="#cb10-11"></a>  <span class="fu">left_join</span>(vendors, <span class="at">by =</span> <span class="fu">c</span>(<span class="st">"vendor_name"</span> <span class="ot">=</span> <span class="st">"code"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb10-12"><a href="#cb10-12"></a>  <span class="fu">select</span>(vendor_name, full_name, pickup_datetime) <span class="sc">|&gt;</span></span>
+<span id="cb10-13"><a href="#cb10-13"></a>  <span class="fu">head</span>(<span class="dv">3</span>) <span class="sc">|&gt;</span></span>
+<span id="cb10-14"><a href="#cb10-14"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 3 × 3
   vendor_name full_name                    pickup_datetime    
   &lt;chr&gt;       &lt;chr&gt;                        &lt;dttm&gt;             
-1 CMT         Creative Mobile Technologies 2012-11-03 10:08:31
-2 CMT         Creative Mobile Technologies 2012-11-03 10:08:35
-3 CMT         Creative Mobile Technologies 2012-11-03 10:08:35</code></pre>
+1 CMT         Creative Mobile Technologies 2012-01-27 23:35:26
+2 CMT         Creative Mobile Technologies 2012-01-27 14:56:04
+3 CMT         Creative Mobile Technologies 2012-01-27 16:12:50</code></pre>
 </div>
 </div>
 </section>
 <section id="traps-for-the-unwary" class="slide level2">
 <h2>Traps for the unwary</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a>nyc_taxi_zones <span class="ot">&lt;-</span></span>
-<span id="cb3-2"><a href="#cb3-2"></a>  <span class="fu">read_csv_arrow</span>(here<span class="sc">::</span><span class="fu">here</span>(<span class="st">"data/taxi_zone_lookup.csv"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb3-3"><a href="#cb3-3"></a>  <span class="fu">select</span>(<span class="at">location_id =</span> LocationID,</span>
-<span id="cb3-4"><a href="#cb3-4"></a>         <span class="at">borough =</span> Borough)</span>
-<span id="cb3-5"><a href="#cb3-5"></a></span>
-<span id="cb3-6"><a href="#cb3-6"></a>nyc_taxi_zones</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>nyc_taxi_zones <span class="ot">&lt;-</span></span>
+<span id="cb12-2"><a href="#cb12-2"></a>  <span class="fu">read_csv_arrow</span>(here<span class="sc">::</span><span class="fu">here</span>(<span class="st">"data/taxi_zone_lookup.csv"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb12-3"><a href="#cb12-3"></a>  <span class="fu">select</span>(<span class="at">location_id =</span> LocationID,</span>
+<span id="cb12-4"><a href="#cb12-4"></a>         <span class="at">borough =</span> Borough)</span>
+<span id="cb12-5"><a href="#cb12-5"></a></span>
+<span id="cb12-6"><a href="#cb12-6"></a>nyc_taxi_zones</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 265 × 2
    location_id borough      
@@ -456,19 +619,21 @@ <h2>Traps for the unwary</h2>
 <section id="why-didnt-this-work" class="slide level2">
 <h2>Why didn’t this work?</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb5-2"><a href="#cb5-2"></a>  <span class="fu">left_join</span>(nyc_taxi_zones, <span class="at">by =</span> <span class="fu">c</span>(<span class="st">"pickup_location_id"</span> <span class="ot">=</span> <span class="st">"location_id"</span>)) <span class="sc">|&gt;</span></span>
-<span id="cb5-3"><a href="#cb5-3"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb14-2"><a href="#cb14-2"></a>  <span class="fu">left_join</span>(nyc_taxi_zones, <span class="at">by =</span> <span class="fu">c</span>(<span class="st">"pickup_location_id"</span> <span class="ot">=</span> <span class="st">"location_id"</span>)) <span class="sc">|&gt;</span></span>
+<span id="cb14-3"><a href="#cb14-3"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-error">
 <pre><code>Error in `compute.arrow_dplyr_query()`:
-! Invalid: Incompatible data types for corresponding join field keys: FieldRef.Name(pickup_location_id) of type int64 and FieldRef.Name(location_id) of type int32</code></pre>
+! Invalid: Incompatible data types for corresponding join field keys: FieldRef.Name(pickup_location_id) of type int64 and FieldRef.Name(location_id) of type int32
+/home/nic/arrow/cpp/src/arrow/acero/hash_join_node.cc:131  ValidateSchemas(join_type, left_schema, left_keys, left_output, right_schema, right_keys, right_output, left_field_name_suffix, right_field_name_suffix)
+/home/nic/arrow/cpp/src/arrow/acero/hash_join_node.cc:724  schema_mgr-&gt;Init( join_options.join_type, left_schema, join_options.left_keys, join_options.left_output, right_schema, join_options.right_keys, join_options.right_output, join_options.filter, join_options.output_suffix_for_left, join_options.output_suffix_for_right)</code></pre>
 </div>
 </div>
 </section>
 <section id="schema-for-the-nyc_taxi-table" class="slide level2">
 <h2>Schema for the <code>nyc_taxi</code> table</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>nyc_taxi<span class="sc">$</span>schema</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1"></a>nyc_taxi<span class="sc">$</span>schema</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Schema
 vendor_name: string
@@ -501,7 +666,7 @@ <h2>Schema for the <code>nyc_taxi</code> table</h2>
 <section id="schema-for-the-nyc_taxi_zones-table" class="slide level2">
 <h2>Schema for the <code>nyc_taxi_zones</code> table</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a><span class="fu">arrow_table</span>(nyc_taxi_zones)<span class="sc">$</span>schema</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a><span class="fu">arrow_table</span>(nyc_taxi_zones)<span class="sc">$</span>schema</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Schema
 location_id: int32
@@ -516,10 +681,10 @@ <h2>Schema for the <code>nyc_taxi_zones</code> table</h2>
 <section id="take-control-of-the-schema" class="slide level2">
 <h2>Take control of the schema</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>nyc_taxi_zones_arrow <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(</span>
-<span id="cb11-2"><a href="#cb11-2"></a>  nyc_taxi_zones, </span>
-<span id="cb11-3"><a href="#cb11-3"></a>  <span class="at">schema =</span> <span class="fu">schema</span>(<span class="at">location_id =</span> <span class="fu">int64</span>(), <span class="at">borough =</span> <span class="fu">utf8</span>())</span>
-<span id="cb11-4"><a href="#cb11-4"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1"></a>nyc_taxi_zones_arrow <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(</span>
+<span id="cb20-2"><a href="#cb20-2"></a>  nyc_taxi_zones, </span>
+<span id="cb20-3"><a href="#cb20-3"></a>  <span class="at">schema =</span> <span class="fu">schema</span>(<span class="at">location_id =</span> <span class="fu">int64</span>(), <span class="at">borough =</span> <span class="fu">utf8</span>())</span>
+<span id="cb20-4"><a href="#cb20-4"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <ul>
 <li><code>schema()</code> takes variable name / types as input</li>
@@ -529,11 +694,11 @@ <h2>Take control of the schema</h2>
 <section id="take-control-of-the-schema-1" class="slide level2">
 <h2>Take control of the schema</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>nyc_taxi_zones_arrow <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(</span>
-<span id="cb12-2"><a href="#cb12-2"></a>  nyc_taxi_zones, </span>
-<span id="cb12-3"><a href="#cb12-3"></a>  <span class="at">schema =</span> <span class="fu">schema</span>(<span class="at">location_id =</span> <span class="fu">int64</span>(), <span class="at">borough =</span> <span class="fu">utf8</span>())</span>
-<span id="cb12-4"><a href="#cb12-4"></a>)</span>
-<span id="cb12-5"><a href="#cb12-5"></a>nyc_taxi_zones_arrow<span class="sc">$</span>schema</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>nyc_taxi_zones_arrow <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(</span>
+<span id="cb21-2"><a href="#cb21-2"></a>  nyc_taxi_zones, </span>
+<span id="cb21-3"><a href="#cb21-3"></a>  <span class="at">schema =</span> <span class="fu">schema</span>(<span class="at">location_id =</span> <span class="fu">int64</span>(), <span class="at">borough =</span> <span class="fu">utf8</span>())</span>
+<span id="cb21-4"><a href="#cb21-4"></a>)</span>
+<span id="cb21-5"><a href="#cb21-5"></a>nyc_taxi_zones_arrow<span class="sc">$</span>schema</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Schema
 location_id: int64
@@ -544,13 +709,13 @@ <h2>Take control of the schema</h2>
 <section id="prepare-the-auxiliary-tables" class="slide level2">
 <h2>Prepare the auxiliary tables</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>pickup <span class="ot">&lt;-</span> nyc_taxi_zones_arrow <span class="sc">|&gt;</span></span>
-<span id="cb14-2"><a href="#cb14-2"></a>  <span class="fu">select</span>(<span class="at">pickup_location_id =</span> location_id,</span>
-<span id="cb14-3"><a href="#cb14-3"></a>         <span class="at">pickup_borough =</span> borough)</span>
-<span id="cb14-4"><a href="#cb14-4"></a></span>
-<span id="cb14-5"><a href="#cb14-5"></a>dropoff <span class="ot">&lt;-</span> nyc_taxi_zones_arrow <span class="sc">|&gt;</span></span>
-<span id="cb14-6"><a href="#cb14-6"></a>  <span class="fu">select</span>(<span class="at">dropoff_location_id =</span> location_id,</span>
-<span id="cb14-7"><a href="#cb14-7"></a>         <span class="at">dropoff_borough =</span> borough)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>pickup <span class="ot">&lt;-</span> nyc_taxi_zones_arrow <span class="sc">|&gt;</span></span>
+<span id="cb23-2"><a href="#cb23-2"></a>  <span class="fu">select</span>(<span class="at">pickup_location_id =</span> location_id,</span>
+<span id="cb23-3"><a href="#cb23-3"></a>         <span class="at">pickup_borough =</span> borough)</span>
+<span id="cb23-4"><a href="#cb23-4"></a></span>
+<span id="cb23-5"><a href="#cb23-5"></a>dropoff <span class="ot">&lt;-</span> nyc_taxi_zones_arrow <span class="sc">|&gt;</span></span>
+<span id="cb23-6"><a href="#cb23-6"></a>  <span class="fu">select</span>(<span class="at">dropoff_location_id =</span> location_id,</span>
+<span id="cb23-7"><a href="#cb23-7"></a>         <span class="at">dropoff_borough =</span> borough)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <ul>
 <li>Join separately for the pickup and dropoff zones</li>
@@ -564,18 +729,18 @@ <h2>Prepare the auxiliary tables</h2>
 <section id="join-and-cross-tabulate" class="slide level2">
 <h2>Join and cross-tabulate</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1"></a><span class="fu">library</span>(tictoc)</span>
-<span id="cb15-2"><a href="#cb15-2"></a></span>
-<span id="cb15-3"><a href="#cb15-3"></a><span class="fu">tic</span>()</span>
-<span id="cb15-4"><a href="#cb15-4"></a>borough_counts <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span> </span>
-<span id="cb15-5"><a href="#cb15-5"></a>  <span class="fu">left_join</span>(pickup) <span class="sc">|&gt;</span></span>
-<span id="cb15-6"><a href="#cb15-6"></a>  <span class="fu">left_join</span>(dropoff) <span class="sc">|&gt;</span></span>
-<span id="cb15-7"><a href="#cb15-7"></a>  <span class="fu">count</span>(pickup_borough, dropoff_borough) <span class="sc">|&gt;</span></span>
-<span id="cb15-8"><a href="#cb15-8"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(n)) <span class="sc">|&gt;</span></span>
-<span id="cb15-9"><a href="#cb15-9"></a>  <span class="fu">collect</span>()</span>
-<span id="cb15-10"><a href="#cb15-10"></a><span class="fu">toc</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1"></a><span class="fu">library</span>(tictoc)</span>
+<span id="cb24-2"><a href="#cb24-2"></a></span>
+<span id="cb24-3"><a href="#cb24-3"></a><span class="fu">tic</span>()</span>
+<span id="cb24-4"><a href="#cb24-4"></a>borough_counts <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span> </span>
+<span id="cb24-5"><a href="#cb24-5"></a>  <span class="fu">left_join</span>(pickup) <span class="sc">|&gt;</span></span>
+<span id="cb24-6"><a href="#cb24-6"></a>  <span class="fu">left_join</span>(dropoff) <span class="sc">|&gt;</span></span>
+<span id="cb24-7"><a href="#cb24-7"></a>  <span class="fu">count</span>(pickup_borough, dropoff_borough) <span class="sc">|&gt;</span></span>
+<span id="cb24-8"><a href="#cb24-8"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(n)) <span class="sc">|&gt;</span></span>
+<span id="cb24-9"><a href="#cb24-9"></a>  <span class="fu">collect</span>()</span>
+<span id="cb24-10"><a href="#cb24-10"></a><span class="fu">toc</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>133.705 sec elapsed</code></pre>
+<pre><code>1171.556 sec elapsed</code></pre>
 </div>
 </div>
 <p><br></p>
@@ -584,7 +749,7 @@ <h2>Join and cross-tabulate</h2>
 <section id="the-results" class="slide level2">
 <h2>The results</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1"></a>borough_counts</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1"></a>borough_counts</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 50 × 3
    pickup_borough dropoff_borough         n
@@ -603,12 +768,19 @@ <h2>The results</h2>
 </div>
 </div>
 </section>
-<section id="your-turn" class="slide level2">
+<section id="your-turn-1" class="slide level2">
 <h2>Your Turn</h2>
 <ol type="1">
 <li>How many taxi pickups were recorded in 2019 from the three major airports covered by the NYC Taxis data set (JFK, LaGuardia, Newark)?</li>
 </ol>
 <p>➡️ <a href="4_data_manipulation_2-exercises.html">Data Manipulation Part I Exercises Page</a></p>
+</section>
+<section id="summary-1" class="slide level2">
+<h2>Summary</h2>
+<ul>
+<li>You can join arrow tables and datasets to R data frames and arrow tables</li>
+<li>The arrow data type of join keys must always match</li>
+</ul>
 </section></section>
 <section>
 <section id="window-functions" class="title-slide slide level1 center">
@@ -624,14 +796,14 @@ <h2>What are window functions?</h2>
 <section id="grouped-summaries" class="slide level2">
 <h2>Grouped summaries</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1"></a>fare_by_year <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb19-2"><a href="#cb19-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2021</span><span class="sc">:</span><span class="dv">2022</span>) <span class="sc">|&gt;</span></span>
-<span id="cb19-3"><a href="#cb19-3"></a>  <span class="fu">select</span>(year, fare_amount)</span>
-<span id="cb19-4"><a href="#cb19-4"></a></span>
-<span id="cb19-5"><a href="#cb19-5"></a>fare_by_year <span class="sc">|&gt;</span></span>
-<span id="cb19-6"><a href="#cb19-6"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb19-7"><a href="#cb19-7"></a>  <span class="fu">summarise</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount)) <span class="sc">|&gt;</span> </span>
-<span id="cb19-8"><a href="#cb19-8"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1"></a>fare_by_year <span class="ot">&lt;-</span> nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb28-2"><a href="#cb28-2"></a>  <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2021</span><span class="sc">:</span><span class="dv">2022</span>) <span class="sc">|&gt;</span></span>
+<span id="cb28-3"><a href="#cb28-3"></a>  <span class="fu">select</span>(year, fare_amount)</span>
+<span id="cb28-4"><a href="#cb28-4"></a></span>
+<span id="cb28-5"><a href="#cb28-5"></a>fare_by_year <span class="sc">|&gt;</span></span>
+<span id="cb28-6"><a href="#cb28-6"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb28-7"><a href="#cb28-7"></a>  <span class="fu">summarise</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount)) <span class="sc">|&gt;</span> </span>
+<span id="cb28-8"><a href="#cb28-8"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 1 × 2
    year mean_fare
@@ -643,10 +815,10 @@ <h2>Grouped summaries</h2>
 <section id="window-functions-1" class="slide level2">
 <h2>Window functions</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>fare_by_year <span class="sc">|&gt;</span></span>
-<span id="cb21-2"><a href="#cb21-2"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb21-3"><a href="#cb21-3"></a>  <span class="fu">mutate</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount)) <span class="sc">|&gt;</span> </span>
-<span id="cb21-4"><a href="#cb21-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1"></a>fare_by_year <span class="sc">|&gt;</span></span>
+<span id="cb30-2"><a href="#cb30-2"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb30-3"><a href="#cb30-3"></a>  <span class="fu">mutate</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount)) <span class="sc">|&gt;</span> </span>
+<span id="cb30-4"><a href="#cb30-4"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-error">
 <pre><code>Error: window functions not currently supported in Arrow
 Call collect() first to pull data into R.</code></pre>
@@ -656,15 +828,15 @@ <h2>Window functions</h2>
 <section id="window-functions---via-joins" class="slide level2">
 <h2>Window functions - via joins</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>fare_by_year <span class="sc">|&gt;</span></span>
-<span id="cb23-2"><a href="#cb23-2"></a>  <span class="fu">left_join</span>(</span>
-<span id="cb23-3"><a href="#cb23-3"></a>    nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb23-4"><a href="#cb23-4"></a>      <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2021</span><span class="sc">:</span><span class="dv">2022</span>) <span class="sc">|&gt;</span></span>
-<span id="cb23-5"><a href="#cb23-5"></a>      <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb23-6"><a href="#cb23-6"></a>      <span class="fu">summarise</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount))</span>
-<span id="cb23-7"><a href="#cb23-7"></a>  ) <span class="sc">|&gt;</span> </span>
-<span id="cb23-8"><a href="#cb23-8"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(fare_amount)) <span class="sc">|&gt;</span></span>
-<span id="cb23-9"><a href="#cb23-9"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1"></a>fare_by_year <span class="sc">|&gt;</span></span>
+<span id="cb32-2"><a href="#cb32-2"></a>  <span class="fu">left_join</span>(</span>
+<span id="cb32-3"><a href="#cb32-3"></a>    nyc_taxi <span class="sc">|&gt;</span></span>
+<span id="cb32-4"><a href="#cb32-4"></a>      <span class="fu">filter</span>(year <span class="sc">%in%</span> <span class="dv">2021</span><span class="sc">:</span><span class="dv">2022</span>) <span class="sc">|&gt;</span></span>
+<span id="cb32-5"><a href="#cb32-5"></a>      <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb32-6"><a href="#cb32-6"></a>      <span class="fu">summarise</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount))</span>
+<span id="cb32-7"><a href="#cb32-7"></a>  ) <span class="sc">|&gt;</span> </span>
+<span id="cb32-8"><a href="#cb32-8"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(fare_amount)) <span class="sc">|&gt;</span></span>
+<span id="cb32-9"><a href="#cb32-9"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 30,902,618 × 3
     year fare_amount mean_fare
@@ -686,13 +858,13 @@ <h2>Window functions - via joins</h2>
 <section id="window-functions---via-duckdb" class="slide level2">
 <h2>Window functions - via duckdb</h2>
 <div class="cell">
-<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1"></a>fare_by_year <span class="sc">|&gt;</span></span>
-<span id="cb25-2"><a href="#cb25-2"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
-<span id="cb25-3"><a href="#cb25-3"></a>  <span class="fu">to_duckdb</span>() <span class="sc">|&gt;</span></span>
-<span id="cb25-4"><a href="#cb25-4"></a>  <span class="fu">mutate</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount)) <span class="sc">|&gt;</span> </span>
-<span id="cb25-5"><a href="#cb25-5"></a>  <span class="fu">to_arrow</span>() <span class="sc">|&gt;</span></span>
-<span id="cb25-6"><a href="#cb25-6"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(fare_amount)) <span class="sc">|&gt;</span></span>
-<span id="cb25-7"><a href="#cb25-7"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1"></a>fare_by_year <span class="sc">|&gt;</span></span>
+<span id="cb34-2"><a href="#cb34-2"></a>  <span class="fu">group_by</span>(year) <span class="sc">|&gt;</span></span>
+<span id="cb34-3"><a href="#cb34-3"></a>  <span class="fu">to_duckdb</span>() <span class="sc">|&gt;</span></span>
+<span id="cb34-4"><a href="#cb34-4"></a>  <span class="fu">mutate</span>(<span class="at">mean_fare =</span> <span class="fu">mean</span>(fare_amount)) <span class="sc">|&gt;</span> </span>
+<span id="cb34-5"><a href="#cb34-5"></a>  <span class="fu">to_arrow</span>() <span class="sc">|&gt;</span></span>
+<span id="cb34-6"><a href="#cb34-6"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(fare_amount)) <span class="sc">|&gt;</span></span>
+<span id="cb34-7"><a href="#cb34-7"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code># A tibble: 30,902,618 × 3
     year fare_amount mean_fare
@@ -711,57 +883,17 @@ <h2>Window functions - via duckdb</h2>
 </div>
 </div>
 </section>
-<section id="your-turn-1" class="slide level2">
+<section id="your-turn-2" class="slide level2">
 <h2>Your Turn</h2>
 <ol type="1">
 <li>How many trips in September 2019 had a longer than average distance for that month?</li>
 </ol>
 <p>➡️ <a href="4_data_manipulation_2-exercises.html">Data Manipulation Part I Exercises Page</a></p>
 </section>
-<section id="custom-functions" class="slide level2">
-<h2>Custom functions</h2>
-<ul>
-<li>Not officially supported</li>
-<li>Works for simple operations but not with bindings</li>
-</ul>
-</section>
-<section id="custom-functions---supported" class="slide level2">
-<h2>Custom functions - supported</h2>
-<div class="cell">
-<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1"></a>millions <span class="ot">&lt;-</span> <span class="cf">function</span>(x) x <span class="sc">/</span> <span class="dv">10</span><span class="sc">^</span><span class="dv">6</span></span>
-<span id="cb27-2"><a href="#cb27-2"></a></span>
-<span id="cb27-3"><a href="#cb27-3"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb27-4"><a href="#cb27-4"></a>  <span class="fu">group_by</span>(vendor_name) <span class="sc">|&gt;</span></span>
-<span id="cb27-5"><a href="#cb27-5"></a>  <span class="fu">summarise</span>(<span class="at">trips =</span> <span class="fu">n</span>()) <span class="sc">|&gt;</span></span>
-<span id="cb27-6"><a href="#cb27-6"></a>  <span class="fu">mutate</span>(</span>
-<span id="cb27-7"><a href="#cb27-7"></a>    <span class="at">trips_mil =</span> <span class="fu">millions</span>(trips)</span>
-<span id="cb27-8"><a href="#cb27-8"></a>  ) <span class="sc">|&gt;</span></span>
-<span id="cb27-9"><a href="#cb27-9"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-stdout">
-<pre><code># A tibble: 3 × 3
-  vendor_name     trips trips_mil
-  &lt;chr&gt;           &lt;int&gt;     &lt;dbl&gt;
-1 CMT         530173884    530.  
-2 VTS         617481207    617.  
-3 &lt;NA&gt;          2697575      2.70</code></pre>
-</div>
-</div>
-</section>
-<section id="custom-functions---not-supported" class="slide level2">
-<h2>Custom functions - not supported</h2>
-<div class="cell">
-<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode numberSource r number-lines code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1"></a>morning <span class="ot">&lt;-</span> <span class="cf">function</span>(x) <span class="fu">ifelse</span>(lubridate<span class="sc">::</span><span class="fu">am</span>(x), <span class="st">"morning"</span>, <span class="st">"afternoon"</span>)</span>
-<span id="cb29-2"><a href="#cb29-2"></a>nyc_taxi <span class="sc">|&gt;</span></span>
-<span id="cb29-3"><a href="#cb29-3"></a>  <span class="fu">group_by</span>(<span class="fu">morning</span>(pickup_datetime)) <span class="sc">|&gt;</span></span>
-<span id="cb29-4"><a href="#cb29-4"></a>  <span class="fu">count</span>() <span class="sc">|&gt;</span></span>
-<span id="cb29-5"><a href="#cb29-5"></a>  <span class="fu">collect</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-error">
-<pre><code>Error: Expression morning(pickup_datetime) not supported in Arrow
-Call collect() first to pull data into R.</code></pre>
-</div>
-</div>
+<section id="summary-2" class="slide level2">
+<h2>Summary</h2>
 <ul>
-<li>recommendation: write code as dplyr expressions instead of functions, or look up docs on user-defined functions for datasets (see <code>?register_scalar_function</code>)</li>
+<li>Window functions in arrow can be achieved via joins or passing data to and from duckdb</li>
 </ul>
 
 
diff --git a/materials/images/segfault.png b/materials/images/segfault.png
new file mode 100644
index 0000000000000000000000000000000000000000..f6a1fb09b9aa4cc14d5a3b389304595f89f3da43
GIT binary patch
literal 16866
zcmeIabx<7Nzb#5a6bTFxJcL1lyK52#w-DSRcyM<}2r>i<!993zcbVW0!F}+-AvgoW
z@H*e~-nqAar_OozRh@VLx?NCIHG5BY@6YbfUZ1tsny`1uGPqdeSQr=>xN@?P_ZS%W
z=`k?wxjeZKoU!LT90dMgy1bQBfAZwX!m7#&2F6PaImjDz&$NRjKW%k$w8Wp$G$GE=
zS9pD$5u6>Q+dqCJ{Y;8G)o2ULH?dgFXR~iI+5BjiXJ_HB=WI7o+uAYzb?Jqfk2Vua
zC9iGwvUc{jst@0z0`J`){)3Mr==kUv4@|=1-J93Ef;opbZeCvPLw+Mj%10wUhc}B`
z5hZ`DXh3rZ?#)Ze5dPF*`+5;Hi3sFA#?_H=(T$LHUJL5@V~!jdr~0+GJ%l@1QJVX4
zZ#_;YC(im+Lf%M)Q`b;%OXZy1f!zUn85PEB=7WO+J3Biu@u)8*68-GPly_WFubpzA
z`Ow%33ky?vO_=OeEbLQJQ3bwiX!69s2-7ard{c38BKR!i);UXZeS4yB)0vDmmd9CU
zpdv9uDYP`FGOIAZEUR{E9zM3ZJ8%ypcfu7uJ~=4~=jG?)<FAu78pZt@&w~}5S{_|r
zU;lWp%DHiIWPX$GRj+4Cj@|^EJ)@YniKRj0utAB8XnSYJ#KUa~1H%zB22@*9lk)Ln
zG!+ZFUz-~LS95b?<J?l8au<Qko@?1j!8@f{J2i%RJARqy$6lBi7`Lxz*93zZY_Fgo
zlh9|>Txp`jL>pU45z^ewQ3X>|0!=KU`CZ+HN!+hT7=R$(uBF8>#BxE@);<&$N8&~f
z`?rZl2|XA=E7|#VmWWt>Gi9h5wve30!1$$6MHsU*(r3mG7V1<?=XRPQG4k?pj);gE
z+$>qLub;CkEuGH#ENK^|GnPKH27I<Y8wJR;eqrDJ?)BY;yPl6vb6WhMl#nFEy}7m1
z!o^bO?KtqU90)<C^^q!fvfBE34=5FXY9z4;q|$&6+?>NQY5r6z?kNU_0OMRY2Zx!w
zf<m|=11%XBJ2NnI(akR%Ffd+Y^-*A8e7k@02m>ScQ_DRJ45t5c`oCvPN{X>DFg~Sf
zPIxI`PcYPkr%inY#`jbEnuytxE&J1pi;J^AU7q9^7_#^N&b;@p;XreuZvsHFc({T4
z@27wHT47+2vPLQ?u}(}gYYX9F|Bcn7@Wjy2j^W|K!G(!V%ysL3>GZFkg;STXC?Ns;
z{}SIvsap{eEz$q0UG{c?+_#c%Zf#eqyC7^k$?)~>#3E%Cl?ZaF_ww=b@$~*63OrTr
zC-g9GNo#9sR#v+$e|Yxx<|Y|F{>thqQ*6I~zp#|5nGQ-5<JYgwm`(4q(@_b%(&<m1
zFa!kZUr_M!@x>=3XsWAgvnF8UapMq@l9GZz;<nlq&CNHpc1=x9%yba8#KCeqCyY<<
zmky7lWMo3SKUmw_+XLy!!Qp6o+nBUxcvvax-PP5Vr<d1x!ejU5+x@xf-e_tEJG=Aq
zO_}HyJib?d6htVWhi?ug^O&2Pv$3&dWMs6rw~LFzS@#&e9^m3e;^N{y#zWwetr3E_
z-sg-tQhL<Jrz++HH>#piZnNx#@hJniyrPVZjH#)qhDQ9Dm94EUkyK@U0+6?Qdqo%-
z83hIDUcOv;djZY%ygu6@e5EXyU}wuqE$sTUrNwuDwi1m-1NXMr=+fQ&ikxEj&6_us
zm6h^&ReJWdb#<CrTA4yF*Rz$Td3p3XGSRBVQ(99q);2cf#l>pHVx)-O(Svj)6{w!x
zOIN?&;aVY9J-rMJVl|?h{N{I>ba6++vH<}F=<UK2au%;+`GENMRL_Mhd>1FXygYjU
zXc^4CW>U!HIc~(f4|1B|e}Qz?NE&3OF&Q1-meR=|S+1&(X%nr|nYq1{I{XTKxS$9I
z6)VoEUSH!BhEqCjmeVWx@*BMbvmdHFzAPo*+ztG(jI}!gc6jvK#%6D>+H!EfpEou2
zdZH~SC#Q03an?%Q?>eHOVC&E7E{N9t`0R|A6z7#nM$MdUB7a#~nZ0#o1f|dbS4jRl
zMh2x6qUX=2*WaavhRS$)0%@9dB)#}*HI(|~^i)AXp*NC(1RwwRC=ItNQKYn~qhnv~
zhEg=E#l%i_+Jux4wZovd)8#AJgkj;R`H0s=nRz*JGKc8PC!6Q_ClgW+8|RX#<5?_W
z8BilVf(!oy)20nJCFEf`l4F-0n^KEWF*T{8|0Mw^@l1L1@ud+S-{)}icoMUjv|rSR
z9OLuz-X|OV)s~}mwY3Hd<hr`LI3W{xd6r}$)(#H!3kQky)<B4boP(e!ly6Q>Z%9zk
znHbY2lcWmH;R=&>%aJr87sL?y_C``tGABDb1c%JId+tG$(3ADv1xlZ-)m10E8!5`s
zQ_3(47eC7Y8Qz_{7XCdn6_cO4b5N^IruXUhid8{)c!|ZE?b_xmqy83;Ekl<?nIaDh
zs@b2ZVP8b_J-+&jCmMJ~Eb0t}wn=9*CL|>IpHFEOD5p0eDjHpZ_ry*tB=XCWy(Otd
z7gVo|l%J%bxmiX^YR9$7q%$O4#KZmw-WnVn47?xXrD+lY=*dF0t<g-_A7h!&+4=d8
z{Hevo#R~HBO>TA9(aopj-Sm0!=5Do8)cFxRqEAxr_L}kP-DtNZZdw*WZ0M#b5tYg5
zX^n)bscFsh-$$N)OW(Wnh-NfLF~&*;ib_gKfM4On&4G5_Q~0RME|rmcMlPX}Q1hAJ
zo#Q?IwY|)*R7Niy%2I$8RGW3#|HjVW-`~cj*lgiodz+Diqr9-Nu_zCPLgg!^@|(-2
zB_?j|OcY%1&$*m!$K>ZT$o%s3@d?jv@Hm1?hCZ8`nOU&qm!aO<9;03MJ&n{YMF&?G
zhe4a^tu;=9xp<4`Y!9j%8(Ze*KBcCiu5cn<-X!3qeoHzT+RNN%8~5W>;wABGv6gb~
z=z@Ox^$WudV%w4ZYT`1%P2>cNSZJr1guW}dSmU0!|Gm#C+hI*Y;aX)%%X;pWG2P!<
z&Aa`0KG04A|Ac?u{j=Q=bdw;Se1X^6KbOrarc<gJ`g4k=Gj?LIInIRHQ^Ia8@XE@P
zY^;7%FXh6&tx%!9gw;>^Q*?BFbhO7x+L^P0&PYO@i)kk&Cgvp9)4{rpr0a!Ho~``>
zrwe;~IUS+pITPN4{%JjLBUz%MWpmNmm*Q`!@PCFWYsV#=cbXr&;`-3UnLB95;S&;U
zlYFZ{Mw)+gpiVdCjD!a5=Zz$Y7fI=meB4`BkcQ-A?yw-brGMVPPn%7ckZ+zTl>z_8
z%zA!l!{CR6sThkM?9Lus%+JqvhT>&P1Q=eI&P+|Ei}_4XOic9l7ThWHn{oI4nU9u0
zTXB+(q%i>pm9VSTyLZ2TXZZt(cWP>in3#BTa}!8^d3o_re{|!Uy!Ypps8XX@>@T8N
zc>&8*L{WRbhKzmQZliI%N0TRhsP8GK0vrZ$bu+H3_ug<4?Q;|D&t;r1c(~gtyS@lx
zsbfp61mApMA=n+nIX4e^duuotH<?Tx1?|c{+i1=ZaqPFItu9=&Wxk`4c$Mi28)Ld*
z%rVSNsB%61tT4#P!W~b%X<_bPgE$yE7rq;OZY^5Jgxnk&8GdScT5YJUuU@~tka+Ec
zl`n2-S?1DS(ToriA5K9Y^T*1n-)U%;ee?8)X2Dl9l8FBU=l^mFHPEcEQG1iohFa&)
zQrWc)!1Wn3tYf1$IepO!)?ap@PrF8b2PKQ^alNXoHdfGS?@fJ9-KWw@7~`_u=t%Si
zuKzG~SW`p8ZTokYI3feLuBxhvMl!^zcD}fkd17hFpm-`IG*nA7<L0=x3Wx--R@&P|
zNUsN%+t}E^(B4PCvw-=3etCJh<%K4<#Mk~><?&nyk6@sytI&^H*~`Eer~Z}E%WUsk
z#K!D!3j>3@^J3}xdOkga@?L^ieo1_~40}R!a;u|7L^*PM<iH2VVod)GF>b#480HC)
zgh(Cbigfl!Fzleaj_>K)m^Q9nnfPU_cF?@<8<rjAAp}(p^j~sJ2H(6d9&>Gy4R{Gt
zkjQ_WKNL@bH=%Ep#y(J=rET<%20v!!SH3w*=QIC?+yyz7RZmR@&{p}=(~q3$f*q2a
zEvXsjL1jm$dJr@CX(PPyL9tvKRc|kr@SU9wL+`hF!_y@>v7u$cVIc>5T0<VMkEHQb
zEP^%+;G)sxv#$K)>vOLC+QV)Sveaf2Q&87pJY5pbwf>i!*PFSyg0(r5$K*oicl0^e
zCAGyIxwHk8goI=-(}asgJ@2q2C>x&PZiV>kUo7=b-MxXICDPhHdm-vMT)_J%Xvto}
z1rbqSuce)YAb)PDqnSgNH5O5!TMFO^-~bQ?#67m(j9<XO`(pvHCT3-gSyWe+qymVZ
zdMm-3Zll%rXsBM6NYOHUfXb3w(C$QG&qw&VJLn?yoJGQ*qOs!CuB5SqvA%>+fW>rB
z_b{O=o}#V<rnl}}^ruv*Iqj(A)t$*@;%Jv61QI>!%ZqNi9E1*Cm+o9Iw`8veonTp-
zb%|b&JRY^;urGoCkSKP1@S`z%h?M|X{xguAwC8J|H8_chNj3aXSwwB|hrVA_P!W+g
zb>-vEkU$#0)rc+UVTjp$ELwB*`#E6~M}gm;PAa|F4()369GQy|4?ale*%k>FQ}s=h
z*~G~HNa6i<o>&H#t{ueiO0_sQVT{+&mbc6MQHc$`1mB9A^IUvzC992fN7bkzgbr1~
zw!X(#K8h%F;()C6(A`;|UI)i&s2jK{LMJ2TvO8+R9`h$lf!Eev_!SJIYS3+F>geOT
z5i}#o=~Wbmz_uqi%W=X_&icCSly2o(K$2MJy&KdKou9ey?xUonWN4US)7g`i_xGsH
z$yrY<fn>e=sa`aeBh%Z{6GbT`Pb9*wM3i8@zP5%KPW6~EaA8nP`tiQ7>^chbJS0#O
z`Ec1SL`iln(|xPW4~7`PK506iyMdz(vM;K}C{}Gp5P8@c18&kWCNQ{A^(WkL^@P34
zjg9^O{yCS~-E!9#t3~J6)KI7zlSb6zuW0mam0Kz|d~^gkVqArfv~i0~qd%6<Go@4M
zQ!u))(!G|bs&YBSJ-_cXkYvr}=PJa`Bq`(|RdlLOT%hFVo8|IEnRVt!z}0Ao#c2v#
z&V&_o+?3nJX_=v;bgo4n$L$yI_oKR5kd`Zo7OX5FGDgOAF0^oY;VFt>;4p|MTKlOM
zi|QgMt>#xlw`bcdWAj^VQUoZZ=dTJy!RVC*5LibH*SFu3Pft(MMRxKOZd*qx4edLM
zCY$v=j*0b)^=|pX-lzs0*PtsF4uIi(jQ0W+9D4Pc00eqLPA>eZK26AdUmJjG#>U3#
z>XXOoQUIX7ckiBwi3z3`ZEaB+N#<1!|4n??;OIU5sAxHVYnhX~WZZ|>sH10lr_N`O
zaTt16WjM{+8vSk~=tW~Ht4?vMt@`vE8Bb5np_`i;5JIaHOs3dnC6)^O<dbT{;ZbMN
zK_=6D4*m0v>kTeSkP}Y=D_MNHzwXN{#XB^^?M-?l16>}<<Thn41ujfQGHA><4-GGE
zfcD8I0MPd7X1p}mF0s4D$%(|DsHL1ECI_40$$eF$Q1ywj5xILM+{f%&{V)7`SgA7O
z?HIt-{#sE;^v0lO*6)a>TnxKh%{Nii*FEI1j>A4q;r7!F#Yvx+juJ~&*ZS<C{3mN8
zj=FYqGDyKt7t<K7Dyda9=_x$PJH){J!oumvGu_f#VG7~dj?PX15w^BG+V|(N9L<RD
zH}hC@l6m{~t)ru(vGL1JPWikO62qhZrCVk5Q{?=2KfObb7lpy=WGtGNM%~&^dTk2m
z^F(Wp52$2RC|gL?1KW+a?{;`pUxLZ&5fcXG<xL_C`{UH9#?14vZTskgG@jNL*MI?d
z<z{^tmwcH*PDjZw6Skq~Bur8thM97TXi%WI^msEdf*qS7bvhES3W{0%`{OjaD-s|!
z+(?oyZ&Zvv3WgPxTNBMlLsAxlHX`&%gdcgiJmgP3*yxW1qF%-x1Z4z+fpX_F4h0ws
z3+roiG~QDu_`8>kvi+P9FxcIvA;tALr$Rj=m4lWY=uWjn+w~H06vJjPGEoH^Aw8V-
ze2NScEX0siJeltIO!=~4?R0oJ@vhj%r>**_gmr(WxTt4#Rax%#3W?4UYA0p{X+fN6
z>Gn)T70P~IF4D$USS;%1x|~Se)@BlRIwBNkV(ISh@X&}=l9eZ|slnrIf1%n4)oRCN
zQQKt`jM?2{%V0Lhm_J!jV<Z59jhA|9almCzms`7kMxSF6qDEpN|1&Udk?%-qv_AGg
z17^f6F%_(&L8ffLZCO#08)`k_DEY1b(QXF4KzV+Ce(WoirY5ZHk&NL_%W@M73pe9s
z5%PKO8Hw83+Aw(tQ=l!b#s14)?r(U_+MI&fmB&(DvP}^svlVOw6(y$`_boatcXVV#
zwwSGX$rGTmA_iNc2Q(X&ZDR+lRmRLi^DW(819R#>IerpAq1wt`3K911Cq-@wYiWe#
zA{<YP`bLEg^&eAbr;D;NZ5~fCagB7En);Q?2p!ZkRE)ULh%68A>XTmj_%#=v>UYOk
z&%>X?m^bG{ZnzTYwyR-Htd2o}9^=);lx_*;tqJDaqI!fR)I7g@ocirPL^7=Bjh@}m
z0~n!4NbAFxrR}hX@`^}Co7>E$DwY6F8f+(g*n)+XwdDok3BIa}OLg}Lve}X0&Bet<
zFc|E8Lm6a~lLq^^svty041weuIM|7#FJ9U`561FC3|<c<hG|D#Ztb-6`yO*kv)iK;
zw}Nj86UtLs?Yh|sV~joR-k`>Ey9#5NHLXg7HPnC1Wb(VjPQMb1&!ywGkVyVh#I--o
z#5K<u`F?YdKVx7i<3JQUdpzV60-;Z4d2UN$AC^o&NcOx$9r>-2(XzRkr`@_H#c(h^
zu4#9~j^#oyKUB=iXVKjt&O!3nWGl@Sj2_V_j4eBOHKQ~KEN~6SD(D|q>ACDMY($Ba
zX~F@p#yP$eDflQSFFTVrYOhCGkV)y&?`s9NpZrcg(RdFYj<1vy_!R;sz(The+h=I@
zI}z1xoENz2!;GpbkO~%PsZ2XCILm4<ZC1=F=3Zn$)br%)h9AUbK7FZeKnZ2&E%()C
zYI1UNYU<&mN6)lJb4WlS5GD#KVAwR7=akH`feoSCux!avxwR?R;$z;x%EqqOCmhOG
zSRQpT@o<<sp-|eYABZcZPGJoncUFSDMP`*mNG@r*_e(=ukRC#t%Wlj1vrBtw4$Xhk
zjm6V5U7BuwlL;_yBA3sOrwwCIdP2?dSeJbJAdrqZR9ZDj=DLI4X*-3&BgBY6LA3|E
z7df$FxtOzu?)fsa#235}Pm3QTm08;xce0>hF;&@e=x;D*Irp&9JdqVtkErE~ZTt+!
z7I!lF&V55m2#T5ev26YleAseGJnnQG&>z=dZi_Br<SW*H32s080*63+^-p|hZmPjJ
zg-bs;Gp1RZm(ZPf5)DmX*#V%tNn-u1!3JGYCvFu`?Q601^zI~jqPoc2F1g`6lvZRJ
zIkB$rJ-^#Z(ld(ogOE5x#OvbwIoZuK3wPxWw?EN(tvP=dJf;hVq9#KAQ1&{Z$qyO?
zmr(S@0s&V?(eJ64vXtNr=1WBzb52}2yz&{e3G4AGNo4)Sb%zU=%2lVF<gwQS3O4VL
zRMJGHDe^aj5(Etk)8l~3QREKplV|OQZaQy1y|6Qg6{TfR>~~@9McRC@peu(r9c>?G
zun)o*LJUNrJ$fST>kd!kYzKDa(rM^6j^^jixi#4L+dM7~Jn79tH2YmB%flLdrWSlS
z8XJGTK75#2gPfS5mWz10**LiT`{Ib67QBf9TF)kNOBSE42?h*rG;6iCUQV3HmVXeY
z)ORp@{EX7d!A1ftx${+qdUt!fV$K%WH}GQWnKD@Qs0O$+^jbJa*@bZL8~WJ(o@i=n
zGBTlA14@ifHZ-*0??5G&9AQG*sE-~;%xLEi4^D)iZm%9Z7SvEz*Vi)nhr7<plfn<O
zxk~~l?x)8n{E;QZ!REp0PS|mcVSHj@V^h!z)R|iC1IdrT9?CPXiRy&s3a0Dlo|!<x
zMA&DVU}88*zJ{Nbw4p7v5`VMwTJvjZ-NSHn`z9b}V`b&&_Oz8CXnJo0Aq=qdp)ET)
zrt5A_n5i$4hm`=FT~>A<V`cZHgSR`{x4KwiTSFmS3g5At0%OtmZ-2X1f4uELV}L94
za3jXR(0V+xd^QxEN$Mw+&1m>4vkSO3eVv0iatxnz!_*w;(VX$>lR?Y92N=I_e`)r5
zOZXYR?3wW9|0PgD`Odi!<;{+V@q>i{+%br|<=~z3j#1y>ZTaHTlFoC0@fE;Hpm_^F
z>jA3ZDf+X4&Mz(=F#mOYj2|+6sTsh){CzqH2kOOgLJWJsB#z%a60j^tURhbTrMS5G
zjLRi9#tOj@)n(+L3-H^!d;x&?C}YpkVjbkhF%-mTXkV$Ass@U>s{Qj>q^XW();nW=
za*Rbs;HFJU8R+P`rZmo87pfbM`f>b2mU<IEe~#EC@LoAO8a+)6a23P&bq<VvF+ln_
zQEu%rH?0Vc3=U#{vBq$u1@8K*32CY39&q}%6>nz<uFK!yx;5t)n3}oM*U~c5(+h9e
ziC1L6#>4Xf4mTQQfP|F{!D)my>0@B{tsrU8Xi3cbC`?SJyBcvOqOaTAR$5xCL@!>5
z`(8Qz6VS+S0)8?5W<V12_iqD`LI}?B(<e_FGLCj1Vf+@@7$)x9Jei#hD{F0g&k!3)
z>Vz#U0L%<Yxc+D0vQM6niny0x+d!dDsJT7_DKfa20NgVS{{%5W8WI)}F=@riK6hPV
z(kUI$@K8Vz^AiQtNxspBGqNj;FfTKl5+5I*j11tVUmm;P!+1n_k`Du-=Foh=A1_Zo
z_x$$u7O2vk+Ho-!8{9Y1Tk1KdHXFklD}wAmWO#Yi1MQgN2lO0T`2+>E1MTi%;C$~=
zr3$?W2*`kb6VOt~dZ(DoZ4Pj}Uj$5*7-9i3O(}(!iILIj8~;6wmyx%xDEj8djcGf!
zN)0d*%mLVU9}^QBJDQ(A<+1xF91f3(iCJA;ZIA=P`GDZ4B^Kb<c_7=%`V=81>daGj
z=%aMg-YBY{6!3<|#z<1G5A!wFInt4DXBRz(bS|x*o9mSMw^!@1xUPOwy7IeYY`!SX
z1h``T7+S*{EMQ|n>1Uy}yKh)aqx<3E;n>)i$CBI3_E=6pzuhnqmXx;k?Agx5-=Q!v
znpqxP(Iw;GyYK7U;^*6f$v5_tDfNs&OKUPbO{R0JO%t)poQ8$Y&~)t$r8m#ZL}^Z#
zVxHb$Pli`LYM{n_8K2$^6VkSKY9Z<HoaX??_KS*%IkcWnbD8$2YiT7WCeBPwLO;OZ
zmIIs4Mn=o40DnX+4)Eue6$<e|V5Xe2FE?K8xyva{R&yaqgnEQdK4aofdx{=S4auV7
zRxJ1&4%5?`hhuoRmvL}{-*SEEg5DuB?~(!15_p8aOfgW8xc<xx4Gq<2PwY3t>0~6r
zGBOgUqcc0(9;*k~#3heWB}GL_9*d!20)7ajiy2DK4oURA^mM7rLY+Jcq}iK$NJ#tk
zKJeht7DPHwsNxaZyH^Ey<@(<^Y59fSm37M+^-ODpeb>@zh0Y<6&E{!12}zk|C}dPS
z<v2YVDm!GDpB$^YssD3jZgxR{2n4#VV50fBgJ4tG8i~jxC4^l~`F`#@8yhbW=h?h7
zw|7-M>zWdRKs1D`!`Ene@9p+ob(bSV>XvlI(}J#MyN{B+Z6&jrESJ)(lw=`8X$9iq
zQi-r({F8}92Eo0|dj96@g$cyI%zEac%`^<3nyUHbI&b`uu8)zgYmhL6D!yXB)|UsA
zKB8Us#$m*dUz|bM+d}vAtXe8O8843`6;!Gj4C!K%U0C0DuqX%_^5efPeE77!cVrHg
ziQ4+Gu4Sv!>}i35Ea|pdjXUVL`k2Z75Jp{Ki>X)YrHTtY{Vj6l@F*!Ybpd$y(1RQ)
zAVs8(Pt-AJkfT{!?)ROOs_xEryc3Tu$B>d`4&95ui-lofaMg?7dL99d4)-nL&0zFg
zq9xU$U^4O9D2J|(`)cT84@FI^{B^1OGOogYJMy=29w*|-?G$jmrWFAj4X^T7Ugx@D
zwbGfRM`*M11Ris?>C)i_pXH**@Hyw#FuvVz4*vogFad6Az^!(BgvT?B-K*WStryxC
zX4~!cTc=Uq$t7-Fmo+mWorq+y8m`v6*{`?rGp8`c4cz2twmza>dXHuCf<~Y9M7{Mm
zXSI>j%Q8=IJ^g8d&2CF%uD~;_gg+(8k0rdmy~<A9><;>Wv;Zdq$1KZ!(gI%vs7JLE
zP_y~7IRQ;<=Lc>=s?8)XP@Hqeg=L$DiS;|?TL4?>W92i8;E+F@PQuG&^Es=G_(@fm
z1u>gCV$*B%S$<9*lXRD6)yE{iRPSR-s*rz4(iQO2+uXtW{WmV3^FVW!_{UTajm96X
zq+|tPV)mu9^8Nz0p*X>-Q`7Y2cSi3o@!FoB=}^1$?`d{l5qeq2W@`5x4fm6TL5Yr;
z3JC?lYX$T@$v6c=kU;T09`?;fzSww37gKL{uvd)9jB;m$<@C@y0;l=McMfvDwnXrr
z_z~r<xG~{<6av5Bb)8p}>Q1b^8m&9X_bSeuX3|-EOE@E1GIca2GW`>(*MQ=0yX4PL
zGYF-n_GVC(>#|nMkUd<jMm8knBAw(pz{<%D&T?iZ?Umvpk4<>KyXAO-*Lw5HS|+?3
ze(cZB3&sT`czK?z?;TA$LI$jD-mga=47<5%6LIwq5^^9iKEfRmgDN3r1IT^HjhwyM
zw?b>Yask>ek`vQScc&k%%&!VOdGrmulRkiV_-ev~jO9hYkRb<Mh^rWv34h86h4YKA
z!|rIf4uw=Y54)$tmL*#Sgzm}x7%SeVMGY0~ES%HH>hgI!J=*c_<-FB0CFKp<T3qxf
zdkMnst0sPbkW<-W2zy)LA<nGbNZP)8HE+~?Ih8-A-Wwe3{-RE@Lw&#SP>%#>jsktP
zo3YOD8LF$awf?@l4UHHI9iKsaHMeysvOYbRo}v}o*A18#Op0LG`&<*$keI%ldHF8V
zhVDH<O!*!C#CvVS%@(8gIfYSiH0eIMIN0kE`QG3=g#`?KkqulNL1iK9sf17$f{al`
zc=9|whbQq7RW&NduUN)S=f_r<ZHs(A*<cYA#UY-L^^QDTY2-J4Bm*JStrz%VFUU=B
zfj7+|7cMi~G(-La3tQ)^{tLw`k6ShTvc;ZU9Q>%M)G$wQwzo^CIg3Cm?4ti@-F^R-
zvnTT>v#I^!-Ci+~YUV{Bzqd(m43a!b_U>kTtE^P%m0{k7r8wG7`aSFw?G-botdY61
z8A0;E;KOCXI~KMFzh6~1_IK1gE!7<93_Ru~B{G2-#%q7i$XIZz2%pFsa#*~0EB!eP
zD$7Z{xk2tt^=#d>|1@|=BA+A)I7c<gQUA@|J)8e>O6jp8ByiP;!=YFqP2dZyCIz9t
ze84XVL_H=Rc}~oLcZ0kp9~?|yNUw3Efr;6L!|Cv2jKxbwnOba*GZq*Lk22n$mD@e~
z3>|uSWG~@Ar=4tUO?M|7DFJ#ab>uL5bfehJ_~f}!q-G~bRYMbDn6qvYEHSDPA0Osd
zyA$J%?l^vI=<lSJ{2G$oh<c~|MuU1dOg33CV4x5NOEaMP_AcQzv0hn8sm*M5xc%Gh
zN&bGdU-{#eY#O*BTJ-2>@bMKA>|pHZNuQtpXTfNAUsx(~qP4+volQt(_wD5Fw=g-I
zYGr|os+OGNPzr~BFII6#X;{js?Fdth)liBDn%o0*sWw&g=i$-2U7rA@wJ_P(laBQ>
z>5eY4r!F(;<PvNpmu>_gCG@%T!RI_s$~UNt%L9IkbBViro?LoJ$WK1ir!b$rt4-qb
zacShra$Ur*PQF5th{F!Qk-?(*mxm^JWk&w!=%>d|UAEXp%uLxGe!F=PYPI4cr^-zU
z5krkfNHr6uk^bk4HJgK&bsW$qs=cLb#y<8F)xRg}mknmG5Q|oBP+@NWW25t)sJ0Z5
zi|qrS$W)7v$I^7R+-$x+d*2V8XcIPDjuYDUvG(q5$L#<5j0s4RdIJ~@j<+QNxzRt;
zriBA{Fu1w67!%_Y1vCOG%MdHXejm`-l*5k8LQ?^~#9-$H=I#Ganipe7b6`!pl^LZb
zWy?#8Uv)8J0g&+*l_8bza3vN>6mD|$;RNeI6%IONg(0xz^Jn9J-I9}w<DCl*<Kx~|
zf8Cm{o@JQ0n{H-i2$RsiA*HfEb2D1uk!j~=Jk@1??iIWz#{8|Zoau=DTf(rPTAkr3
zSXBn?5}Sw>zy{}zgbGIG?3WuDP=G*81#EK8Zn&_x*-V(0|H*AX!Q_2M+~2Uz_d1+`
z5*7=5vO;O(lCDAqlh?y92A1_vU+(7Y)ApjY8mVJY3Wl2*YEA2uBf!!CeH;tyBu)Xd
z;^$TO5g8OJ%joUUy*TSw%S`7z601}25Da$l_m=k!Vt|xBjm~J)bf5vNjbLLlNC*Ao
zqXAXA@RJa@k*V<P)U!5;anrq{^7k2FXf^$*G8Dkh@86wJ)9pfFA062y=9ZeJ*Sl})
z{dV#z<!306CLA+=zB&guSfS{UQNQX4=zvFQD~Gu7TdM3RdF_i77muLz1<j!dO`Erq
zpt1V$p+oK>Phr!!N3BAab5}{*+sW1s!7;{t-tO(dmh5XK*Nz-Asi>?XYJPeM+Y;(z
zys>FEjHQ|duJ^;RhUxV3>DVrY8)`gaJwIC@5&nXVfD{B<Tj%bL(kE5nX{~>qo|e$&
z{H7yfc6(OHx-lwy=sFSD(+w)tpaC8KWlh9LCf18-Q=F%V!={5ET|-{G>uPitms*&=
z0FBR=;gq{d!_&3)fq{-l-P>4>%_k5#a{jyP%0Mm(yoAa4eS`7=-HQ!N145a;xij0C
z!=vtaa}9bg>XPIlF!;2r_ql_e657(qd=rm)h+V~pu2q(<)5nHqwiho!T_b_`=w|S3
zWZHJKR^|d>9X+h5s${pzk|OqPQ_1WZ+H_F-Os~4Wrp|1c-2bL3ej$zPCHYy%arr(#
ztD#ByHLR^OT}xl_+JC*QL{US#z<ycf2`tQlN_5Yu7}b?BN@&0*<gk;aFZA&m-PCNn
zu#4GzNr>6V{5Ojx4;eWfjm^R@4ichKFxTLm_sfO_>y|C972925t$VMk1JTjp<MM6}
zTUt}kkF?>((^VvAQGM0Z8{4fJNJzk0AB7wi*yd2VbErOAJ;C4G(rdo_Xvxg5Y8|4{
zzr1#mxt!)Qw~}}M34Z<i@r`rz3tzd|^ZmhxU{Fl&Px|tGwldxXf20WeUZrI<TOzTf
z7)&_-l3K6Qs2Laqy1V$K7FMy7M`Z*jj~=DIZQf3f<D#q_$VCc|yi-?(+|AW3@j9GO
z2-7R*Tdk#p)?SNUAId1y9Zw!-J7a^&!^pY`lReNpGN*s8|912pqP0OJyIf>2D33hN
zSL;<yeYh64{-Hw<EF`lPDb;Om5!3LSwDe>=KE+vFgc7-Zo%=3IP-&bxK?Y&IC5ZXT
z7A`3#p8%brQ&uDUs^Z4RU@`x|^o=1|GC)bSOwPUa{~X4Af|p5MP9X^);-}RS`1%U$
zufIDWQW}aq0EL?NYXke8PrFX$oyeoSUu{>5Bfig+em#D5GR`1T`E5BhmV*Gq)Iy*}
zJ#MJ2392`nvc;7_ZTPs6SG%tsxymP8k)67psP(cmm1}@9zYBk+1^erMNb7eTP=ug1
zkO18u1)YH3c~%9eqVW=mCr*n^2j<)aPqHit;vs4Ln_*^l)XkW`3DaPo-)!w2?V$#~
ztP9KH%W;wq?wzx^dT*JiS2%M8#&55Ik`)%O?7ilVG8DD75EMQtyG@aWu@MZh=d;rf
z376gYL9F^!!5#iL11xSF4%g>N=FcM!+s`TPzLD~;9@l63c}-xu2&B^=48@&ul#Llg
zq)Dq$F-Z!;v_gWNq}9Y?BtU~m!|hKMX)oQwCG6!<SHwQp1Cq}Ym9(oIQ<&CcQO@0~
zCrLg%E)WPpa>wSb#lYwTLqd>u&Mt^2h5<Pmz_R?UP4Uty=PoW-#nu;n&#pYi^nXA;
zDO~{aRo{mvYUz)U8+Cg;FKoCX>UDaHhpFF^dFqt_eemWbIBWa1mIG}NP&6XX%9o-`
z!tY`9Gf}JH-|!uJ`|UsYPT~I9Q!h;RUk`tFynT*7Sn26+y}N%>pJzW8(<H@7+|qUi
zFIXNw&?{4C&KFI)>Hp$86EdGm0+;jt95kV$B38l6R&;iN^d0&_A>rvrJ4IrS@<09x
z>q?(3*|Xg*#(C)%A0{gYF=OT+@m)vVXz)k~vG3F0^520ECXanf<Ze)R3(~!`1@;?Z
zvXcd3Rgs3XPG)T(wBvW;*FwGASz*OI1X{80GE(aVH~TBBjq*dz>fHYwAD$N{*<4Cq
ze@%=iq~7x94tZ6{s8oM-!eiOmBFtd^$*}~k*hL7MN~L(z_`U&YEQ+gdGTsr4x~O6E
zj(rUYMAnEGRO~s8zK^Re=%d4TY8lVmf<ia9#J+ic+&kilP7Q;y=4XptJ7=F@&Rthq
zfY(s*ZIm*sW7d8b9_E3vUB&8#ko~-LEb}!KQvn+7>>m93t<HrJT$qnfsB-X=-jV&7
zl8MCVl_D`oZ|vQMpubO#p_F1;2vnBnL3p`z@hbEuGmA@k3{amxY(5L|{tQ)nbah=t
zIm4$<j26`b0NZw)V^bV5a>1~VkrM=ZnxCDvc&PULQAnQZin@u3C@2}vc2In2UXlyQ
zECXH51w^-lT(KSce-1C!M-JoOo%!o}m+f&>2JTUxJktLhsdGCD+rhDZ8bk9@NxkC;
zE)1Wo{XCm=7u`!aI?}%}jdE*DfM(v+<7r-7h>5-_eHNDFdlcS0_#h_e>*&N`<_|#a
zv!2PTc(->wN14tsk3L(pQi2MDO=9Uc&tI*v8&saO!&?2yohF9<{DKB4<p(=B7>tiL
z(C{;`M-2@K8gPQF^A$C9=9ngWfPL4*hTY)4a(}8+PRkL$(M+sx7sG8+(F$uP-v(VO
zg4=I~H{(4UWqND*Y(_6C{N6yI!uEEb`*sW$D~}V>Ub)eHbnENNt<@=LzJ)yM%k7;R
z<rU>)^FM5P80}a3m12ENl56s`ILG&ogI*7Ub%K5<z1}sooW0CtyPQ!(jAc}iajIij
z%6;Y6=Zm_Fb&0q#&$FzksAs9e7Jw$sT?7=fG&OY(U{E2_5IR5x!A;i532>+90`POk
zYA@jM^!^d>UkGIXRqOg+%W40cTXSB?kJ11-YU4lF_y0<FIj=Te{EMRcQ~L)thehN8
z@;Mm%!9X8-5%M)a_zod3<t4$7b-=GRb6@MTi)nt;AHR*Lm7gZ?va*Nv{DGCo<={gX
zg0OD_obQ14nctC^-x1PuH4DxvcWIZVrWOOFkY_lo;`Ue53wDN(AfP&8bnsV;tZ2-7
zdQc|-D|B}wcI1(S|N7N;Q#m$8|BEA-1oTLa0$2hc2@43^Yk_hmEynz-*!Vvm9sNtv
z@_3j6<FcT+!Ovordshz~?|2<?rHp?MBxhYNuJJ5})2NDC@&=p_Cl5<KX8}F@=q~X>
zv+6cgW7%kUGqja8FG&!6bAuwDtULW8S=<J23)(Ox6(#L$dwul#W39DM`?w+|yM0qm
z_r}nNlBB6q<GQ~2-lrK~TM+kjgkpxh&$P`gg3tS2m*W2-{wA=3#R?NP-|zffH~+Qi
zpc~wKI#G+p<Jj2tZ4Y_B@Mr8Eh&5h+lexSm?xy9A>KNY76O2j7Y4AWf3ZAebkvsKW
zOW&g`or>%Qr%6X&y=WKNLXpj53+xZ@`jD2-I>es_Upo9EMq%b(6Y{B-4{&aTLAn%M
zc|Mv~D6K47(yMse1d4!$^+f%q`(sV(HSov^=e`O0dRH{RwPg`<IYf>Qo}|cene1Qo
zzu}X?S@4xEdEw-LBX<;+#IB&zY+q5RLz#_ZDAsuLxn<hEmhryE-gn;W&X)UyrMWfU
zom+Wc{_}p(Kaoe_ul$W`ynn)V3Hb&)>+U9i7{6q$$-xsRQheCECQ*8{_<+ZaP9li$
zXs_e($vhN_3KUqP=n~+0!#eeq*Vpwb+~bjoMx&a|-dJ|^=n;Pc6!HSq=@G4;eNJ}R
ztvx(^gDpREP+U&`{=wt&R6_4j$8wsI(z?3U(*O#7-(~rTnStJXQ7T*HO=_rsIzIBM
zG>xK-v%|9CF&`iAk%W5F;X(+*hx}!VJGj|J@o04B?<hs8oxF;q`T$iF>T>Cl<d+7i
z+R|wLb!O^_p1#3Tlz??*iWHdoj$RD~7KQdDT%KRz*|MM&D&L_psq3{Cptju)E%}L(
zN)3x}VOW`YAMbW;I*pX;@S?F)GO2Q3X1iArJhYZ!J+^}fWTUO>^Vk=Rm7F&Hu|+fr
zk`KqupL)RDU}5rPZ)DCxw$3BJ#-!ALmkgz#z+&U2>w6VQ6FkqMX;S@FvBu(OAS2@}
z%eSr^a|3?Wg+*n^*m?(|aETZ)`K}dlU^*yK%KAJkkk3KvNAlrRT8y9bsT`O4?)KS(
z3}|u+X{~*r)<i$GfUjy(t1{Q#$o6pTQrNMM*YbAUYTV^;GOm$EhT~;`r^)2Nud;_c
z;w%AT=p9n(qMz_4*!6>T9*4t*`h0ItjK==U4<8CbRqV=^G#3>`GrzkIvR>ZvTgC9d
zr<wP@uC{(^lCQCe;L2fKztt<?opj)GNh6`fl6i5dow%-Hee(}z%RJ9R#q)M1T9QW#
z@+0aN88x7zjOL@EH{r-ru#4JD5g%L6Nk{Rog-i?-Ag1jtT(WK>fUK@mX)-GutevtG
z-<RNfQ+s_eaC^fcUh@0V^18}&Eb(3&7r$zx233Vzamp~Y$cLI??9;F3FUPMQe8m+}
zKzel|8SCslD@f*8W&B%Q)khL8hvyDf=YvyzpDhRLvX#<LAzEU4>>i$PK`eH=Rv0~)
z7XvJ8Oj!IlqNbwK1(f;RGU+>v-k7lonGUditVI|2M}I9W1ce>rZidYVf9ak|o?5$z
z<pXq(JP7uitlve6LEL7{dg#&d4-`zyNnh|Dszj<uQK84?eq?_@7W6P&Sc<#8#X90q
zl`_)6`BVJZ(T+3Y5xWV?q7iApkb~yr?<-Dm;kZn`<V-#qLI0Z=zGV-w_7B#C_wT<i
zh}QOyC^m+Yf<qbwOUJE7_B=*gHWtQ5Uqu)R^7)#qHe%yF!}~-27Lp@FEl070OzQ1>
zd0fhk9ee{R7L#3<;^Vu!j!DnkG`;nWV@~)uEYoa(!ACmNqFZlQ^HExmx)@>Fseu=*
z&PJ(qT^ykP>{1JVEU1gAfKY@Y+z^E2ba#ncOMAN|#_2XKR!UEd+ZY^AmdBsnvlKGz
zMTj!e&(pKC7uTKt<MqbNP%qIs_YHIT>ud&7m#|1^pYEJ*kMw8%3=pf(2ywi+fB-0l
zagvu1$%Q2l^2Eb0WDjun==aS3t~gx?Nna)aiO@KqVELz)UB&zXMqbvbt7<i=1=p%l
z-Max(X)aoJ=vv{maXx6>VCabVlD^IDv;(=tD`a^ZJZZ-3LEZOAXbMhVaa*e(#Dafb
zem=#f-_p$LCG}9+jY{VjcfCt))XX&#>gUDmXm=v@FS`#;PUcQd^dKgb#ttkpC6BC)
zuH}Rg1R{PRSce=6Ht*umEMgNBhlzWxmfkz=^vkYkSJl-JHR8Pcv{|eYHN^2WL&Dq2
zmQlhjaCFiMO?#(Wv05a|NHv1{RF<q^WaXN|@iTOxH{fRR%z^ryTSh9rLWCwuOrS}_
z2tQT`H0eovV&KoNcdgZv`W;oGl8~<Rnh}MB-KoaF@dE~OasR7`5sk^oT7D4j@sIE0
zAuT)z7yf!0I)0sg-7&|S$n0zlRjkTCy%sj-qcUuYT|qyrrroo%_mDImcO+U%$G3M%
zzPbmCb+uVYMHUdKcf8S?1%zwT)xv}8mG``TNokB-SBil%Df$C>oxig!+6b0><-RFI
z!DEuUj6W^*AqdY%4T*Xj?9rMFvoYeSG*pvu7g|?0t(hdiRYgKLArP#{QQ456WqF?X
zh^2cttY;3t9?FlPeA(LrJ}Ro0F$<RW$REB?rM;ptV-W@RQW)iHK$VrTGU5jos2*E-
z=V&OFUqdaMcoYdML>#s6<m=^g>88t}!@Le{+#>v*Ep)rw<MX`uz77~06f^bi)~l77
zo|pCZ#r)F#X|YHkIE5wPv}jeo7zvHB2)@Q4q2L~{rAvv{oPQp5L6eHyjyAwMutO{k
zM8z5VoA1V5QKyd9sv7_PT__KBSvsu#kTIt&szb=9lWNbGv8qu>=vT5#GFn!F+Jzsh
z6qnqbGP?xS9cTICOKyE+)6D!qLOrQWOu-E+6r!!7?G%H~%SUK}yStY})sW@#Q}S+B
z*O)_Fd_$9A?Vl$vGz{IsA_E_t@!cnN++O~~lEj<<^WmQOZF_;RjMNy$x?I~OuM2Qu
zb%O=>C1}0VnYG2H1ff_;XXaYQOY=%|u?+n}mm3$eHM|fE)0V4~7;cx#i|h(a-iyEl
z<s(jP)o|~ZRnzMX8Cn|}zkohn-E);B-eR6H%a~i1hLp;%utFewQEHI2v9A{3>=48k
z+$Bw>!%_6;iC@p#YEfoUEBNdW?a_;lqe0Ev&OaI~s^&W<(E}F(j224NNLM1g_g1pE
zliD^mBeVY8r!snHaWNe590Q^6Lb{I5NG;sykO98TxC4iaCd>Sb?nc_f<H~*1T33_(
ztC}tZ40U>C)K<?3o>Wo6i?X+wMekH<$Z+nE*<(~WoII<ot@jJa-l&|6`&X>p9!dML
z(?lx~1M`RH5)trMfSU{yqsRbYZ=6nGSAAyLbEb0`0SN9_cRr*4hZF5T2Z#UeTbs7x
z1$>C9K8?4JUrRXINBoWwfxu@41zPR}zv+5V9M>85rcE-Wp^2cOQi*DHZ<k;tu_OG+
z7c2)$V`EWg8BRFW@^3)_M2twFfx*MYA;(XjK7;s%M0m`HB2*<~gas%^B9pm3T3HMv
z{og>@;!5;BDW_^@OFcuhGay26Oz+TB{TD-H#gw#~kB^TjVHphY-@WO2c6{;C#%7Di
z&-XsZUk7FIG4H?h_}^uLTmW3Ia5%emX<69=z&Olb;1Q3QZwZ;vRoD~Lj+dXChHI+L
zZ0r^R&N>D&#^(ref$JEP7o;ixx&4OM_x*}z#*?kT#m74@@4>%5n1Ate{%UFhL^ci9
zkxe3^J;R7A^8Ys8n-s!^v`j3RssHDi<RxQP9~~S7Qd0qZhk>y;q;3A|_eGt1Y+?IU
zJTSt;IRCE(McL18Zh-mJk>@m`d9#$v@Wu8r;D+S=4{pf+g*o^?c_;s4>&(BE<d>hf
z`Ueu?$m2d0)_=^DDl+XP3@-XZ5(uQpUz6ga=%L6g>72YOc*x)RkMqLf#IG~;ViI<(
z$o5Z%SP2OSuRrCY#Q(&ieZuKNTkYJ70`%qInm5nAJ4N^MSrp&@>+Jjz+UjP~b)e8H
zDk=sblK<T5`5$lhzcPmY&#e<;xNti0V<Kdy5Bd+6foq<H)2fM&3tXr3`nycqZO^`w
z{`-1!OEcIIO8r%71>s;|pum7w%Z$7Dx1qJwlql&fd|76B&f)`=b*Iyz#0<Jb{o%Yh
z(4)GsNfVewZ@AKzk_N_^VPkE_G_0LoBS~w+1VGlnCf)U6yxSJL9I_zok?xv3FsolM
zFaX}D#n)z#C@1a^6Iq6ZbNGxio`-;dfDnu2qIroTgQ;1>-MhB6MU~LgXgI%lc0}Ld
z%I^3*jFm@Vm!3$)9?sBb0hjBQ?^Ovm5s01voksl;ZXCc3Z62YRha;%xJbk4$e08iU
zZ~m>|ap+%LEjkvYlcIY9E1va1^g2#=eZ}c)YHn<<WMxU@Dxv+l5wU2nn*Z$SM_b$L
zfNO_`S*4vJ4fj4#ENI|P@apOMhT=4$j(hU+fekk;{g$>iGzxjjuZLFvBMUhOoVu5n
zM*&=je8jI$EH!|ujFew->ePjQ^;ffWlLC)j((PGUwTxuPi=<x!p!8zojW`1n{p<d3
zS8D5r<rI5l38$JteQbG>>UcYX;M5&YgF}z4$PWmI5a2rl0{?z5fRcNHi<<QnS(xO5
z+eQ_Nt2iI83|Zc(fHOIp$?{1;6l-&R%51KlMbt~M&uv)B(sEjPb$@(}_&`<2DNnCr
z%hW`=lc{zv#U28IU`QM^muV9jUp3y_#utI8+B!IpNU84Qj|yZS-KjjrSn)P3DDF$m
zq)7D^?V6dMy16;Q)KZ6~6&Kk%5MJA_BSvuXo+jjH_<tYV<W6?;nBOV7G1XH3R_&Z%
zt^-6c!F;nMjUL?ljT{70prcJz1TMIl-O^dz*<Fi?fx53X1zRn#%XpC?CVFUc{O63c
zA^X~Vvd=NBUhpMphzV(k`J#@8^6E$h5E;Pw4k&L1$%OVAR;1YzMRW&c5knM>lsu0x
zRzlo0ZYiXS<4?C~w1}`qzQhVU*uVR$E;O@`XkJWLV>EMa)bHs70-G#J!u}_2a5H@Z
zi!^n(r_ZUF0;8dFZePoZE9-66il7unv<s%z<nTW$+>7wKo<Y)HdO_g(4WhJ7ND+6K
Z+%~AO2T||EfLw_oC#4K2dutr{e*v0KlNJB~

literal 0
HcmV?d00001

diff --git a/search.json b/search.json
index 2af84ca..79e4b73 100644
--- a/search.json
+++ b/search.json
@@ -181,12 +181,82 @@
     "section": "Arrow for Improving Those Sluggish Worklows",
     "text": "Arrow for Improving Those Sluggish Worklows\n\na “drop-in” for many dplyr workflows (Table or Dataset)\nworks when your tabular data get too big for your RAM (Dataset)\nprovides tools for re-engineering data storage for better performance (arrow::write_dataset())\n\n\nLot’s of ways to speed up sluggish workflows e.g. writing more performant tidyverse code, use other data frame libraries like data.table or polars, use duckDB or other databases, Spark + splarklyr … However, Arrow offers some attractive features for tackling this challenge, especially for dplyr users.\n\n\n\n\n🔗 posit.io/arrow"
   },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#what-if-a-function-binding-doesnt-exist---revisited",
+    "href": "materials/4_data_manipulation_2.html#what-if-a-function-binding-doesnt-exist---revisited",
+    "title": "Big Data in R with Arrow",
+    "section": "What if a function binding doesn’t exist - revisited!",
+    "text": "What if a function binding doesn’t exist - revisited!\n\nOption 1 - find a workaround\nOption 2 - user-defined functions (UDFs)"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#why-use-a-udf",
+    "href": "materials/4_data_manipulation_2.html#why-use-a-udf",
+    "title": "Big Data in R with Arrow",
+    "section": "Why use a UDF?",
+    "text": "Why use a UDF?\nSometimes it’s hard to find a workaround\n\nnyc_taxi |&gt;\n  mutate(duration_minutes = difftime(pickup_datetime, dropoff_datetime, units = \"minutes\")) |&gt;\n  select(pickup_datetime, dropoff_datetime, duration_minutes) |&gt;\n  head() |&gt;\n  collect()\n\nError: In difftime(pickup_datetime, dropoff_datetime, units = \"minutes\"), `difftime()` with units other than `secs` not supported in Arrow\nCall collect() first to pull data into R."
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#user-defined-functions---definition",
+    "href": "materials/4_data_manipulation_2.html#user-defined-functions---definition",
+    "title": "Big Data in R with Arrow",
+    "section": "User-defined functions - definition",
+    "text": "User-defined functions - definition\n\nregister_scalar_function(\n  name = \"time_diff_minutes\",\n  function(context, pickup, dropoff) {\n    difftime(dropoff, pickup, units = \"mins\") |&gt;\n      round() |&gt;\n      as.integer()\n  },\n  in_type = schema(\n    pickup = timestamp(unit = \"ms\"),\n    dropoff = timestamp(unit = \"ms\")\n  ),\n  out_type = int32(),\n  auto_convert = TRUE\n)"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#user-defined-functions---definition-1",
+    "href": "materials/4_data_manipulation_2.html#user-defined-functions---definition-1",
+    "title": "Big Data in R with Arrow",
+    "section": "User-defined functions - definition",
+    "text": "User-defined functions - definition\nGive the function a name\n\nregister_scalar_function(\n  name = \"time_diff_minutes\",\n  function(context, pickup, dropoff) {\n    difftime(dropoff, pickup, units = \"mins\") |&gt;\n      round() |&gt;\n      as.integer()\n  },\n  in_type = schema(\n    pickup = timestamp(unit = \"ms\"),\n    dropoff = timestamp(unit = \"ms\")\n  ),\n  out_type = int32(),\n  auto_convert = TRUE\n)"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#user-defined-functions---definition-2",
+    "href": "materials/4_data_manipulation_2.html#user-defined-functions---definition-2",
+    "title": "Big Data in R with Arrow",
+    "section": "User-defined functions - definition",
+    "text": "User-defined functions - definition\nDefine the body of the function - first argument must be context\n\nregister_scalar_function(\n  name = \"time_diff_minutes\",\n  function(context, pickup, dropoff) {\n    difftime(dropoff, pickup, units = \"mins\") |&gt;\n      round() |&gt;\n      as.integer()\n  },\n  in_type = schema(\n    pickup = timestamp(unit = \"ms\"),\n    dropoff = timestamp(unit = \"ms\")\n  ),\n  out_type = int32(),\n  auto_convert = TRUE\n)"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#user-defined-functions---definition-3",
+    "href": "materials/4_data_manipulation_2.html#user-defined-functions---definition-3",
+    "title": "Big Data in R with Arrow",
+    "section": "User-defined functions - definition",
+    "text": "User-defined functions - definition\nDefine the schema of the input arguments\n\nregister_scalar_function(\n  name = \"time_diff_minutes\",\n  function(context, pickup, dropoff) {\n    difftime(dropoff, pickup, units = \"mins\") |&gt;\n      round() |&gt;\n      as.integer()\n  },\n  in_type = schema(\n    pickup = timestamp(unit = \"ms\"),\n    dropoff = timestamp(unit = \"ms\")\n  ),\n  out_type = int32(),\n  auto_convert = TRUE\n)"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#user-defined-functions---definition-4",
+    "href": "materials/4_data_manipulation_2.html#user-defined-functions---definition-4",
+    "title": "Big Data in R with Arrow",
+    "section": "User-defined functions - definition",
+    "text": "User-defined functions - definition\nDefine the data type of the output\n\nregister_scalar_function(\n  name = \"time_diff_minutes\",\n  function(context, pickup, dropoff) {\n    difftime(dropoff, pickup, units = \"mins\") |&gt;\n      round() |&gt;\n      as.integer()\n  },\n  in_type = schema(\n    pickup = timestamp(unit = \"ms\"),\n    dropoff = timestamp(unit = \"ms\")\n  ),\n  out_type = int32(),\n  auto_convert = TRUE\n)"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#user-defined-functions---usage",
+    "href": "materials/4_data_manipulation_2.html#user-defined-functions---usage",
+    "title": "Big Data in R with Arrow",
+    "section": "User-defined functions - usage",
+    "text": "User-defined functions - usage\n\nnyc_taxi |&gt;\n  mutate(duration_minutes = time_diff_minutes(pickup_datetime, dropoff_datetime)) |&gt;\n  select(pickup_datetime, dropoff_datetime, duration_minutes) |&gt;\n  head() |&gt;\n  collect()\n\n# A tibble: 6 × 3\n  pickup_datetime     dropoff_datetime    duration_minutes\n  &lt;dttm&gt;              &lt;dttm&gt;                         &lt;int&gt;\n1 2012-11-02 23:40:32 2012-11-02 23:58:16               18\n2 2012-11-02 23:40:41 2012-11-02 23:45:56                5\n3 2012-11-02 23:40:50 2012-11-02 23:49:20                8\n4 2012-11-02 23:40:52 2012-11-02 23:46:15                5\n5 2012-11-02 23:41:00 2012-11-02 23:44:00                3\n6 2012-11-02 23:41:00 2012-11-02 23:45:00                4"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#your-turn",
+    "href": "materials/4_data_manipulation_2.html#your-turn",
+    "title": "Big Data in R with Arrow",
+    "section": "Your Turn",
+    "text": "Your Turn\n\nWrite a user-defined function which wraps the stringr function str_replace_na(), and use it to replace any NA values in the vendor_name column with the string “No vendor” instead.\n\n➡️ Data Manipulation Part I Exercises Page"
+  },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#summary",
+    "href": "materials/4_data_manipulation_2.html#summary",
+    "title": "Big Data in R with Arrow",
+    "section": "Summary",
+    "text": "Summary\n\nYou can use UDFs to create your own bindings when they don’t exist!"
+  },
   {
     "objectID": "materials/4_data_manipulation_2.html#joining-a-reference-table",
     "href": "materials/4_data_manipulation_2.html#joining-a-reference-table",
     "title": "Big Data in R with Arrow",
     "section": "Joining a reference table",
-    "text": "Joining a reference table\n\nvendors &lt;- tibble::tibble(\n  code = c(\"VTS\", \"CMT\", \"DDS\"),\n  full_name = c(\n    \"Verifone Transportation Systems\",\n    \"Creative Mobile Technologies\",\n    \"Digital Dispatch Systems\"\n  )\n)\n\nnyc_taxi |&gt;\n  left_join(vendors, by = c(\"vendor_name\" = \"code\")) |&gt;\n  select(vendor_name, full_name, pickup_datetime) |&gt;\n  head(3) |&gt;\n  collect()\n\n# A tibble: 3 × 3\n  vendor_name full_name                    pickup_datetime    \n  &lt;chr&gt;       &lt;chr&gt;                        &lt;dttm&gt;             \n1 CMT         Creative Mobile Technologies 2012-11-03 10:08:31\n2 CMT         Creative Mobile Technologies 2012-11-03 10:08:35\n3 CMT         Creative Mobile Technologies 2012-11-03 10:08:35"
+    "text": "Joining a reference table\n\nvendors &lt;- tibble::tibble(\n  code = c(\"VTS\", \"CMT\", \"DDS\"),\n  full_name = c(\n    \"Verifone Transportation Systems\",\n    \"Creative Mobile Technologies\",\n    \"Digital Dispatch Systems\"\n  )\n)\n\nnyc_taxi |&gt;\n  left_join(vendors, by = c(\"vendor_name\" = \"code\")) |&gt;\n  select(vendor_name, full_name, pickup_datetime) |&gt;\n  head(3) |&gt;\n  collect()\n\n# A tibble: 3 × 3\n  vendor_name full_name                    pickup_datetime    \n  &lt;chr&gt;       &lt;chr&gt;                        &lt;dttm&gt;             \n1 CMT         Creative Mobile Technologies 2012-01-27 23:35:26\n2 CMT         Creative Mobile Technologies 2012-01-27 14:56:04\n3 CMT         Creative Mobile Technologies 2012-01-27 16:12:50"
   },
   {
     "objectID": "materials/4_data_manipulation_2.html#traps-for-the-unwary",
@@ -200,7 +270,7 @@
     "href": "materials/4_data_manipulation_2.html#why-didnt-this-work",
     "title": "Big Data in R with Arrow",
     "section": "Why didn’t this work?",
-    "text": "Why didn’t this work?\n\nnyc_taxi |&gt;\n  left_join(nyc_taxi_zones, by = c(\"pickup_location_id\" = \"location_id\")) |&gt;\n  collect()\n\nError in `compute.arrow_dplyr_query()`:\n! Invalid: Incompatible data types for corresponding join field keys: FieldRef.Name(pickup_location_id) of type int64 and FieldRef.Name(location_id) of type int32"
+    "text": "Why didn’t this work?\n\nnyc_taxi |&gt;\n  left_join(nyc_taxi_zones, by = c(\"pickup_location_id\" = \"location_id\")) |&gt;\n  collect()\n\nError in `compute.arrow_dplyr_query()`:\n! Invalid: Incompatible data types for corresponding join field keys: FieldRef.Name(pickup_location_id) of type int64 and FieldRef.Name(location_id) of type int32\n/home/nic/arrow/cpp/src/arrow/acero/hash_join_node.cc:131  ValidateSchemas(join_type, left_schema, left_keys, left_output, right_schema, right_keys, right_output, left_field_name_suffix, right_field_name_suffix)\n/home/nic/arrow/cpp/src/arrow/acero/hash_join_node.cc:724  schema_mgr-&gt;Init( join_options.join_type, left_schema, join_options.left_keys, join_options.left_output, right_schema, join_options.right_keys, join_options.right_output, join_options.filter, join_options.output_suffix_for_left, join_options.output_suffix_for_right)"
   },
   {
     "objectID": "materials/4_data_manipulation_2.html#schema-for-the-nyc_taxi-table",
@@ -242,7 +312,7 @@
     "href": "materials/4_data_manipulation_2.html#join-and-cross-tabulate",
     "title": "Big Data in R with Arrow",
     "section": "Join and cross-tabulate",
-    "text": "Join and cross-tabulate\n\nlibrary(tictoc)\n\ntic()\nborough_counts &lt;- nyc_taxi |&gt; \n  left_join(pickup) |&gt;\n  left_join(dropoff) |&gt;\n  count(pickup_borough, dropoff_borough) |&gt;\n  arrange(desc(n)) |&gt;\n  collect()\ntoc()\n\n133.705 sec elapsed\n\n\n\n2-3 minutes to join twice and cross-tabulate on non-partition variables, with 1.15 billion rows of data 🙂"
+    "text": "Join and cross-tabulate\n\nlibrary(tictoc)\n\ntic()\nborough_counts &lt;- nyc_taxi |&gt; \n  left_join(pickup) |&gt;\n  left_join(dropoff) |&gt;\n  count(pickup_borough, dropoff_borough) |&gt;\n  arrange(desc(n)) |&gt;\n  collect()\ntoc()\n\n1171.556 sec elapsed\n\n\n\n2-3 minutes to join twice and cross-tabulate on non-partition variables, with 1.15 billion rows of data 🙂"
   },
   {
     "objectID": "materials/4_data_manipulation_2.html#the-results",
@@ -252,12 +322,19 @@
     "text": "The results\n\nborough_counts\n\n# A tibble: 50 × 3\n   pickup_borough dropoff_borough         n\n   &lt;chr&gt;          &lt;chr&gt;               &lt;int&gt;\n 1 &lt;NA&gt;           &lt;NA&gt;            732357953\n 2 Manhattan      Manhattan       351198872\n 3 Queens         Manhattan        14440705\n 4 Manhattan      Queens           13052517\n 5 Manhattan      Brooklyn         11180867\n 6 Queens         Queens            7440356\n 7 Unknown        Unknown           4491811\n 8 Queens         Brooklyn          3662324\n 9 Brooklyn       Brooklyn          3550480\n10 Manhattan      Bronx             2071830\n# ℹ 40 more rows"
   },
   {
-    "objectID": "materials/4_data_manipulation_2.html#your-turn",
-    "href": "materials/4_data_manipulation_2.html#your-turn",
+    "objectID": "materials/4_data_manipulation_2.html#your-turn-1",
+    "href": "materials/4_data_manipulation_2.html#your-turn-1",
     "title": "Big Data in R with Arrow",
     "section": "Your Turn",
     "text": "Your Turn\n\nHow many taxi pickups were recorded in 2019 from the three major airports covered by the NYC Taxis data set (JFK, LaGuardia, Newark)?\n\n➡️ Data Manipulation Part I Exercises Page"
   },
+  {
+    "objectID": "materials/4_data_manipulation_2.html#summary-1",
+    "href": "materials/4_data_manipulation_2.html#summary-1",
+    "title": "Big Data in R with Arrow",
+    "section": "Summary",
+    "text": "Summary\n\nYou can join arrow tables and datasets to R data frames and arrow tables\nThe arrow data type of join keys must always match"
+  },
   {
     "objectID": "materials/4_data_manipulation_2.html#what-are-window-functions",
     "href": "materials/4_data_manipulation_2.html#what-are-window-functions",
@@ -294,32 +371,18 @@
     "text": "Window functions - via duckdb\n\nfare_by_year |&gt;\n  group_by(year) |&gt;\n  to_duckdb() |&gt;\n  mutate(mean_fare = mean(fare_amount)) |&gt; \n  to_arrow() |&gt;\n  arrange(desc(fare_amount)) |&gt;\n  collect()\n\n# A tibble: 30,902,618 × 3\n    year fare_amount mean_fare\n   &lt;int&gt;       &lt;dbl&gt;     &lt;dbl&gt;\n 1  2021     818283.      13.5\n 2  2021     398466.      13.5\n 3  2021     395854.      13.5\n 4  2021       6965       13.5\n 5  2021       6960.      13.5\n 6  2021       6010       13.5\n 7  2021       5954       13.5\n 8  2021       4969       13.5\n 9  2021       3555.      13.5\n10  2021       3009       13.5\n# ℹ 30,902,608 more rows"
   },
   {
-    "objectID": "materials/4_data_manipulation_2.html#your-turn-1",
-    "href": "materials/4_data_manipulation_2.html#your-turn-1",
+    "objectID": "materials/4_data_manipulation_2.html#your-turn-2",
+    "href": "materials/4_data_manipulation_2.html#your-turn-2",
     "title": "Big Data in R with Arrow",
     "section": "Your Turn",
     "text": "Your Turn\n\nHow many trips in September 2019 had a longer than average distance for that month?\n\n➡️ Data Manipulation Part I Exercises Page"
   },
   {
-    "objectID": "materials/4_data_manipulation_2.html#custom-functions",
-    "href": "materials/4_data_manipulation_2.html#custom-functions",
-    "title": "Big Data in R with Arrow",
-    "section": "Custom functions",
-    "text": "Custom functions\n\nNot officially supported\nWorks for simple operations but not with bindings"
-  },
-  {
-    "objectID": "materials/4_data_manipulation_2.html#custom-functions---supported",
-    "href": "materials/4_data_manipulation_2.html#custom-functions---supported",
-    "title": "Big Data in R with Arrow",
-    "section": "Custom functions - supported",
-    "text": "Custom functions - supported\n\nmillions &lt;- function(x) x / 10^6\n\nnyc_taxi |&gt;\n  group_by(vendor_name) |&gt;\n  summarise(trips = n()) |&gt;\n  mutate(\n    trips_mil = millions(trips)\n  ) |&gt;\n  collect()\n\n# A tibble: 3 × 3\n  vendor_name     trips trips_mil\n  &lt;chr&gt;           &lt;int&gt;     &lt;dbl&gt;\n1 CMT         530173884    530.  \n2 VTS         617481207    617.  \n3 &lt;NA&gt;          2697575      2.70"
-  },
-  {
-    "objectID": "materials/4_data_manipulation_2.html#custom-functions---not-supported",
-    "href": "materials/4_data_manipulation_2.html#custom-functions---not-supported",
+    "objectID": "materials/4_data_manipulation_2.html#summary-2",
+    "href": "materials/4_data_manipulation_2.html#summary-2",
     "title": "Big Data in R with Arrow",
-    "section": "Custom functions - not supported",
-    "text": "Custom functions - not supported\n\nmorning &lt;- function(x) ifelse(lubridate::am(x), \"morning\", \"afternoon\")\nnyc_taxi |&gt;\n  group_by(morning(pickup_datetime)) |&gt;\n  count() |&gt;\n  collect()\n\nError: Expression morning(pickup_datetime) not supported in Arrow\nCall collect() first to pull data into R.\n\n\n\nrecommendation: write code as dplyr expressions instead of functions, or look up docs on user-defined functions for datasets (see ?register_scalar_function)\n\n\n\n\n🔗 posit.io/arrow"
+    "section": "Summary",
+    "text": "Summary\n\nWindow functions in arrow can be achieved via joins or passing data to and from duckdb\n\n\n\n\n🔗 posit.io/arrow"
   },
   {
     "objectID": "materials/3_data_engineering.html#data-engineering",
@@ -580,6 +643,13 @@
     "section": "R for Data Science (2e)",
     "text": "R for Data Science (2e)\n\n\n\n\n\nChapter 23: Arrow\n\nhttps://r4ds.hadley.nz/\n\n\n\n\n\n🔗 posit.io/arrow"
   },
+  {
+    "objectID": "materials/2_data_manipulation_1.html#goals",
+    "href": "materials/2_data_manipulation_1.html#goals",
+    "title": "Big Data in R with Arrow",
+    "section": "Goals",
+    "text": "Goals\nAvoiding these! But…don’t worry!"
+  },
   {
     "objectID": "materials/2_data_manipulation_1.html#arrow-datasets",
     "href": "materials/2_data_manipulation_1.html#arrow-datasets",
@@ -648,7 +718,7 @@
     "href": "materials/2_data_manipulation_1.html#calling-nrow-to-see-how-much-data",
     "title": "Big Data in R with Arrow",
     "section": "calling nrow() to see how much data",
-    "text": "calling nrow() to see how much data\n\nnyc_taxi |&gt;\n  filter(year %in% 2017:2021) |&gt;\n  nrow()\n\n[1] 356236190"
+    "text": "calling nrow() to see how much data\n\nnyc_taxi |&gt;\n  filter(year %in% 2017:2021) |&gt;\n  nrow()\n\nCalled from: dim.arrow_dplyr_query(x)\ndebug: rows &lt;- Scanner$create(x)$CountRows()\ndebug: c(rows, cols)\n\n\n[1] 356236190"
   },
   {
     "objectID": "materials/2_data_manipulation_1.html#calling-nrow-doesnt-work-with-intermediate-step",
@@ -676,14 +746,14 @@
     "href": "materials/2_data_manipulation_1.html#use-head-then-collect-to-preview-output-for-large-queries",
     "title": "Big Data in R with Arrow",
     "section": "use head() then collect() to preview output for large queries",
-    "text": "use head() then collect() to preview output for large queries\nHow much were fares in GBP (£)?\n\nfares_pounds &lt;- nyc_taxi |&gt;\n  filter(year %in% 2012:2015) |&gt;\n  mutate(\n    fare_amount_pounds = fare_amount * 0.79\n  ) |&gt;\n  select(fare_amount, fare_amount_pounds)\n\nHow many rows?\n\nfares_pounds |&gt;\n  nrow()\n\n[1] 662951433"
+    "text": "use head() then collect() to preview output for large queries\nHow much were fares in GBP (£)?\n\nfares_pounds &lt;- nyc_taxi |&gt;\n  mutate(\n    fare_amount_pounds = fare_amount * 0.79\n  )\n\nHow many rows?\n\nfares_pounds |&gt;\n  nrow()\n\n[1] 1150352666"
   },
   {
-    "objectID": "materials/2_data_manipulation_1.html#use-head-then-collect-to-preview-output",
-    "href": "materials/2_data_manipulation_1.html#use-head-then-collect-to-preview-output",
+    "objectID": "materials/2_data_manipulation_1.html#use-head-select-filter-and-collect-to-preview-results",
+    "href": "materials/2_data_manipulation_1.html#use-head-select-filter-and-collect-to-preview-results",
     "title": "Big Data in R with Arrow",
-    "section": "use head() then collect() to preview output",
-    "text": "use head() then collect() to preview output\n\nfares_pounds |&gt;\n  head() |&gt;\n  collect()\n\n# A tibble: 6 × 2\n  fare_amount fare_amount_pounds\n        &lt;dbl&gt;              &lt;dbl&gt;\n1        29.7              23.5 \n2         9.3               7.35\n3         4.1               3.24\n4         4.5               3.56\n5         4.5               3.56\n6         4.1               3.24"
+    "section": "Use head(), select(), filter(), and collect() to preview results",
+    "text": "Use head(), select(), filter(), and collect() to preview results\n\nnyc_taxi |&gt;\n  filter(year == 2020) |&gt;\n  mutate(fare_pounds = fare_amount * 0.79) |&gt;\n  select(fare_amount, fare_pounds) |&gt;\n  head() |&gt;\n  collect()\n\n# A tibble: 6 × 2\n  fare_amount fare_pounds\n        &lt;dbl&gt;       &lt;dbl&gt;\n1         8          6.32\n2        17         13.4 \n3         6.5        5.14\n4         7          5.53\n5         6.5        5.14\n6        42         33.2"
   },
   {
     "objectID": "materials/2_data_manipulation_1.html#use-across-to-transform-data-in-multiple-columns",
@@ -699,6 +769,13 @@
     "section": "use across() to transform data in multiple columns",
     "text": "use across() to transform data in multiple columns\n\ntaxis_gbp |&gt;\n  select(contains(\"amount\")) |&gt;\n  head() |&gt;\n  collect()\n\n# A tibble: 6 × 8\n  fare_amount tip_amount tolls_amount total_amount fare_amount_pounds\n        &lt;dbl&gt;      &lt;dbl&gt;        &lt;dbl&gt;        &lt;dbl&gt;              &lt;dbl&gt;\n1        29.7       6.04            0        36.2               23.5 \n2         9.3       0               0         9.8                7.35\n3         4.1       1.38            0         5.98               3.24\n4         4.5       1               0         6                  3.56\n5         4.5       0               0         5.5                3.56\n6         4.1       0               0         5.6                3.24\n# ℹ 3 more variables: tip_amount_pounds &lt;dbl&gt;, tolls_amount_pounds &lt;dbl&gt;,\n#   total_amount_pounds &lt;dbl&gt;"
   },
+  {
+    "objectID": "materials/2_data_manipulation_1.html#summary",
+    "href": "materials/2_data_manipulation_1.html#summary",
+    "title": "Big Data in R with Arrow",
+    "section": "Summary",
+    "text": "Summary\n\nUse nrow() to work out how many rows of data your analyses will return\nUse compute() when you need to execute intermediate steps\nUse collect() to pull all of the data into your R session\nUse head(), select(), filter(), and collect() to preview results\nUse across() to manipulate data in multiple columns at once"
+  },
   {
     "objectID": "materials/2_data_manipulation_1.html#example---slice",
     "href": "materials/2_data_manipulation_1.html#example---slice",
@@ -725,14 +802,14 @@
     "href": "materials/2_data_manipulation_1.html#or-call-collect-first",
     "title": "Big Data in R with Arrow",
     "section": "Or call collect() first",
-    "text": "Or call collect() first\n\nlong_rides_2021 |&gt;\n  collect() |&gt;\n  slice(1:3)\n\n# A tibble: 3 × 3\n  pickup_datetime      year trip_distance\n  &lt;dttm&gt;              &lt;int&gt;         &lt;dbl&gt;\n1 2021-01-03 09:01:26  2021          216.\n2 2021-01-03 11:36:52  2021          268.\n3 2021-10-02 15:04:53  2021          188."
+    "text": "Or call collect() first\n\nlong_rides_2021 |&gt;\n  collect() |&gt;\n  slice(1:3)\n\n# A tibble: 3 × 3\n  pickup_datetime      year trip_distance\n  &lt;dttm&gt;              &lt;int&gt;         &lt;dbl&gt;\n1 2021-01-06 07:27:55  2021          271.\n2 2021-01-03 09:01:26  2021          216.\n3 2021-01-03 11:36:52  2021          268."
   },
   {
     "objectID": "materials/2_data_manipulation_1.html#tidyr-functions---pivot",
     "href": "materials/2_data_manipulation_1.html#tidyr-functions---pivot",
     "title": "Big Data in R with Arrow",
     "section": "tidyr functions - pivot",
-    "text": "tidyr functions - pivot\n\nlibrary(tidyr)\n\nnyc_taxi |&gt; \n  group_by(vendor_name) |&gt;\n  summarise(max_fare = max(fare_amount), min_fare = min(fare_amount)) |&gt;\n  pivot_longer(!vendor_name, names_to = \"metric\") |&gt; \n  collect()\n\nError in UseMethod(\"pivot_longer\"): no applicable method for 'pivot_longer' applied to an object of class \"arrow_dplyr_query\""
+    "text": "tidyr functions - pivot\n\nlibrary(tidyr)\n\nnyc_taxi |&gt; \n  group_by(vendor_name) |&gt;\n  summarise(max_fare = max(fare_amount)) |&gt;\n  pivot_longer(!vendor_name, names_to = \"metric\") |&gt; \n  collect()\n\nError in UseMethod(\"pivot_longer\"): no applicable method for 'pivot_longer' applied to an object of class \"arrow_dplyr_query\""
   },
   {
     "objectID": "materials/2_data_manipulation_1.html#duckdb",
@@ -746,7 +823,7 @@
     "href": "materials/2_data_manipulation_1.html#tidyr-functions---pivot-with-duckdb",
     "title": "Big Data in R with Arrow",
     "section": "tidyr functions - pivot with duckdb!",
-    "text": "tidyr functions - pivot with duckdb!\n\nlibrary(duckdb)\n\nnyc_taxi |&gt; \n  group_by(vendor_name) |&gt;\n  summarise(max_fare = max(fare_amount), min_fare = min(fare_amount)) |&gt;\n  to_duckdb() |&gt; # send data to duckdb\n  pivot_longer(!vendor_name, names_to = \"metric\") |&gt; \n  to_arrow() |&gt; # return data back to arrow\n  collect()\n\n# A tibble: 6 × 3\n  vendor_name metric     value\n  &lt;chr&gt;       &lt;chr&gt;      &lt;dbl&gt;\n1 CMT         max_fare 998310.\n2 VTS         max_fare  10000.\n3 &lt;NA&gt;        max_fare   3555.\n4 CMT         min_fare   -652.\n5 VTS         min_fare  -1856 \n6 &lt;NA&gt;        min_fare   -150.\n\n\n\n\n\n\n\n\nRequires arrow 13.0.0\n\n\nThis code requires arrow 13.0.0 or above to run, due to a bug which was fixed in this version"
+    "text": "tidyr functions - pivot with duckdb!\n\nlibrary(duckdb)\n\nnyc_taxi |&gt; \n  group_by(vendor_name) |&gt;\n  summarise(max_fare = max(fare_amount)) |&gt;\n  to_duckdb() |&gt; # send data to duckdb\n  pivot_longer(!vendor_name, names_to = \"metric\") |&gt; \n  to_arrow() |&gt; # return data back to arrow\n  collect()\n\n# A tibble: 3 × 3\n  vendor_name metric     value\n  &lt;chr&gt;       &lt;chr&gt;      &lt;dbl&gt;\n1 CMT         max_fare 998310.\n2 VTS         max_fare  10000.\n3 &lt;NA&gt;        max_fare   3555.\n\n\n\n\n\n\n\n\nRequires arrow 13.0.0\n\n\nThis code requires arrow 13.0.0 or above to run, due to a bugfix in this version"
   },
   {
     "objectID": "materials/2_data_manipulation_1.html#using-functions-inside-verbs-1",
@@ -769,6 +846,13 @@
     "section": "Morning vs afternoon - without namespacing",
     "text": "Morning vs afternoon - without namespacing\n\nlibrary(lubridate)\n\nnyc_taxi |&gt;\n  group_by(time_of_day = ifelse(am(pickup_datetime), \"morning\", \"afternoon\")) |&gt;\n  count() |&gt;\n  collect()\n\n# A tibble: 2 × 2\n# Groups:   time_of_day [2]\n  time_of_day         n\n  &lt;chr&gt;           &lt;int&gt;\n1 afternoon   736491676\n2 morning     413860990"
   },
+  {
+    "objectID": "materials/2_data_manipulation_1.html#what-if-a-function-isnt-implemented",
+    "href": "materials/2_data_manipulation_1.html#what-if-a-function-isnt-implemented",
+    "title": "Big Data in R with Arrow",
+    "section": "What if a function isn’t implemented?",
+    "text": "What if a function isn’t implemented?\n\nnyc_taxi |&gt;\n  mutate(vendor_name = na_if(vendor_name, \"CMT\")) |&gt;\n  head() |&gt;\n  collect()\n\nError: Expression na_if(vendor_name, \"CMT\") not supported in Arrow\nCall collect() first to pull data into R."
+  },
   {
     "objectID": "materials/2_data_manipulation_1.html#head-to-the-docs-again-to-see-whats-implemented",
     "href": "materials/2_data_manipulation_1.html#head-to-the-docs-again-to-see-whats-implemented",
@@ -776,6 +860,20 @@
     "section": "Head to the docs again to see what’s implemented!",
     "text": "Head to the docs again to see what’s implemented!\n\n?`arrow-dplyr`\n\nor view them at https://arrow.apache.org/docs/r/reference/acero.html"
   },
+  {
+    "objectID": "materials/2_data_manipulation_1.html#option-1---find-a-workaround",
+    "href": "materials/2_data_manipulation_1.html#option-1---find-a-workaround",
+    "title": "Big Data in R with Arrow",
+    "section": "Option 1 - find a workaround!",
+    "text": "Option 1 - find a workaround!\n\nnyc_taxi |&gt;\n  mutate(vendor_name = ifelse(vendor_name == \"CMT\", NA, vendor_name)) |&gt;\n  head() |&gt;\n  collect()\n\n# A tibble: 6 × 24\n  vendor_name pickup_datetime     dropoff_datetime    passenger_count\n  &lt;chr&gt;       &lt;dttm&gt;              &lt;dttm&gt;                        &lt;int&gt;\n1 &lt;NA&gt;        2012-01-20 14:09:36 2012-01-20 14:42:25               1\n2 &lt;NA&gt;        2012-01-20 14:54:10 2012-01-20 15:06:55               1\n3 &lt;NA&gt;        2012-01-20 08:08:01 2012-01-20 08:11:02               1\n4 &lt;NA&gt;        2012-01-20 08:36:22 2012-01-20 08:39:44               1\n5 &lt;NA&gt;        2012-01-20 20:58:32 2012-01-20 21:03:04               1\n6 &lt;NA&gt;        2012-01-20 19:40:20 2012-01-20 19:43:43               2\n# ℹ 20 more variables: trip_distance &lt;dbl&gt;, pickup_longitude &lt;dbl&gt;,\n#   pickup_latitude &lt;dbl&gt;, rate_code &lt;chr&gt;, store_and_fwd &lt;chr&gt;,\n#   dropoff_longitude &lt;dbl&gt;, dropoff_latitude &lt;dbl&gt;, payment_type &lt;chr&gt;,\n#   fare_amount &lt;dbl&gt;, extra &lt;dbl&gt;, mta_tax &lt;dbl&gt;, tip_amount &lt;dbl&gt;,\n#   tolls_amount &lt;dbl&gt;, total_amount &lt;dbl&gt;, improvement_surcharge &lt;dbl&gt;,\n#   congestion_surcharge &lt;dbl&gt;, pickup_location_id &lt;int&gt;,\n#   dropoff_location_id &lt;int&gt;, year &lt;int&gt;, month &lt;int&gt;"
+  },
+  {
+    "objectID": "materials/2_data_manipulation_1.html#option-2",
+    "href": "materials/2_data_manipulation_1.html#option-2",
+    "title": "Big Data in R with Arrow",
+    "section": "Option 2",
+    "text": "Option 2\n\nIn data manipulation part 2!"
+  },
   {
     "objectID": "materials/2_data_manipulation_1.html#your-turn-1",
     "href": "materials/2_data_manipulation_1.html#your-turn-1",
@@ -784,11 +882,11 @@
     "text": "Your Turn\n\nUse the dplyr::filter() and stringr::str_ends() functions to return a subset of the data which is a) from September 2020, and b) the value in vendor_name ends with the letter “S”.\nTry to use the stringr function str_replace_na() to replace any NA values in the vendor_name column with the string “No vendor” instead. What happens, and why?\nBonus question: see if you can find a different way of completing the task in question 2.\n\n➡️ Data Manipulation Part I Exercises Page"
   },
   {
-    "objectID": "materials/2_data_manipulation_1.html#summary",
-    "href": "materials/2_data_manipulation_1.html#summary",
+    "objectID": "materials/2_data_manipulation_1.html#summary-1",
+    "href": "materials/2_data_manipulation_1.html#summary-1",
     "title": "Big Data in R with Arrow",
     "section": "Summary",
-    "text": "Summary\n\nWorking with Arrow datasets allow you to manipulate data which is larger-than-memory\nYou can use many dplyr functions with arrow - run ?\\arrow-dplyr`` to view the docs\nYou can pass data to duckdb to use functions implemented in dbplyr and duckdb but not arrow\n\n\n\n\n🔗 posit.io/arrow"
+    "text": "Summary\n\nWorking with Arrow datasets allow you to manipulate data which is larger-than-memory\nYou can use many dplyr functions with arrow - run ?\\arrow-dplyr`` to view the docs\nYou can pass data to duckdb to use functions implemented in dbplyr and duckdb but not arrow\nSometimes the easiest solution is an alternative path\n\n\n\n\n🔗 posit.io/arrow"
   },
   {
     "objectID": "materials/1_hello_arrow.html#poll-arrow",
@@ -1012,7 +1110,7 @@
     "href": "materials/4_data_manipulation_2-exercises.html",
     "title": "Data Manipulation Part 2 - Exercises",
     "section": "",
-    "text": "library(arrow)\nlibrary(dplyr)\nlibrary(duckdb)\n\n\nnyc_taxi &lt;- open_dataset(here::here(\"data/nyc-taxi\"))\nnyc_taxi\n\nFileSystemDataset with 120 Parquet files\nvendor_name: string\npickup_datetime: timestamp[ms]\ndropoff_datetime: timestamp[ms]\npassenger_count: int64\ntrip_distance: double\npickup_longitude: double\npickup_latitude: double\nrate_code: string\nstore_and_fwd: string\ndropoff_longitude: double\ndropoff_latitude: double\npayment_type: string\nfare_amount: double\nextra: double\nmta_tax: double\ntip_amount: double\ntolls_amount: double\ntotal_amount: double\nimprovement_surcharge: double\ncongestion_surcharge: double\npickup_location_id: int64\ndropoff_location_id: int64\nyear: int32\nmonth: int32\n\n\n\n\n\n\n\n\nJoins\n\n\n\n\nProblemSolution 1\n\n\n\nHow many taxi pickups were recorded in 2019 from the three major airports covered by the NYC Taxis data set (JFK, LaGuardia, Newark)?\n\n\n\n\npickup_location &lt;- read_csv_arrow(here::here(\"data/taxi_zone_lookup.csv\"))\n\npickup_location &lt;- pickup_location |&gt;\n  select(\n    pickup_location_id = LocationID,\n    borough = Borough,\n    pickup_zone = Zone\n  ) |&gt;\n  arrow_table(schema = schema(\n    pickup_location_id = int64(),\n    borough = utf8(),\n    pickup_zone = utf8()\n  ))\n\nnyc_taxi |&gt;\n  filter(year == 2019) |&gt;\n  left_join(pickup_location) |&gt;\n  filter(str_detect(pickup_zone, \"Airport\")) |&gt;\n  count(pickup_zone) |&gt;\n  collect()\n\n# A tibble: 3 × 2\n  pickup_zone             n\n  &lt;chr&gt;               &lt;int&gt;\n1 LaGuardia Airport 2159224\n2 JFK Airport       2729336\n3 Newark Airport       8643\n\n\n\n\n\n\n\n\n\n\n\n\n\nWindow functions\n\n\n\n\nProblemSolution 1\n\n\n\nHow many trips in September 2019 had a longer than average distance for that month?\n\n\n\n\nOption 1 - via DuckDB\n\nnyc_taxi |&gt;\n  filter(year == 2019, month == 9) |&gt;\n  to_duckdb() |&gt;\n  mutate(mean_distance = mean(trip_distance)) |&gt;\n  to_arrow() |&gt;\n  filter(trip_distance &lt; mean_distance) |&gt;\n  count() |&gt;\n  collect()\n\n# A tibble: 1 × 1\n        n\n    &lt;int&gt;\n1 4881580\n\n\n\n\nOption 2 - via a join\n\nnyc_taxi |&gt;\n  filter(year == 2019, month == 9) |&gt;\n  left_join(\n    nyc_taxi |&gt;\n      filter(year == 2019, month == 9) |&gt;\n      group_by(year) |&gt;\n      summarise(mean_distance = mean(trip_distance))\n    ) |&gt;\n  filter(trip_distance &lt; mean_distance) |&gt;\n  count() |&gt;\n  collect()\n\n# A tibble: 1 × 1\n        n\n    &lt;int&gt;\n1 4881580"
+    "text": "library(arrow)\nlibrary(dplyr)\nlibrary(duckdb)\n\n\nnyc_taxi &lt;- open_dataset(here::here(\"data/nyc-taxi\"))\nnyc_taxi\n\nFileSystemDataset with 120 Parquet files\nvendor_name: string\npickup_datetime: timestamp[ms]\ndropoff_datetime: timestamp[ms]\npassenger_count: int64\ntrip_distance: double\npickup_longitude: double\npickup_latitude: double\nrate_code: string\nstore_and_fwd: string\ndropoff_longitude: double\ndropoff_latitude: double\npayment_type: string\nfare_amount: double\nextra: double\nmta_tax: double\ntip_amount: double\ntolls_amount: double\ntotal_amount: double\nimprovement_surcharge: double\ncongestion_surcharge: double\npickup_location_id: int64\ndropoff_location_id: int64\nyear: int32\nmonth: int32\n\n\n\n\n\n\n\n\nUser-defined functions\n\n\n\n\nProblemSolution 1\n\n\n\nWrite a user-defined function which wraps the stringr function str_replace_na(), and use it to replace any NA values in the vendor_name column with the string “No vendor” instead.\n\n\n\n\n# Preview the distinct vendor names before we start\nnyc_taxi |&gt;\n  filter(year == 2019) |&gt; # smaller subset of the data\n  distinct(vendor_name) |&gt;\n  collect()\n\n# A tibble: 3 × 1\n  vendor_name\n  &lt;chr&gt;      \n1 CMT        \n2 VTS        \n3 &lt;NA&gt;       \n\n\n\nregister_scalar_function(\n  name = \"replace_vendor_na\",\n  function(context, string) {\n    stringr::str_replace_na(string, \"No vendor\")\n  },\n  in_type = schema(string = string()),\n  out_type = string(),\n  auto_convert = TRUE\n)\n\nvendor_names_fixed &lt;- nyc_taxi |&gt;\n  mutate(vendor_name = replace_vendor_na(vendor_name)) \n\n# Preview the distinct vendor names to check it's worked\nvendor_names_fixed |&gt;\n  filter(year == 2019) |&gt; # smaller subset of the data\n  distinct(vendor_name) |&gt;\n  collect()\n\n# A tibble: 3 × 1\n  vendor_name\n  &lt;chr&gt;      \n1 CMT        \n2 VTS        \n3 No vendor  \n\n\n\n\n\n\n\n\n\n\n\n\n\nJoins\n\n\n\n\nProblemSolution 1\n\n\n\nHow many taxi pickups were recorded in 2019 from the three major airports covered by the NYC Taxis data set (JFK, LaGuardia, Newark)?\n\n\n\n\npickup_location &lt;- read_csv_arrow(here::here(\"data/taxi_zone_lookup.csv\"))\n\npickup_location &lt;- pickup_location |&gt;\n  select(\n    pickup_location_id = LocationID,\n    borough = Borough,\n    pickup_zone = Zone\n  ) |&gt;\n  arrow_table(schema = schema(\n    pickup_location_id = int64(),\n    borough = utf8(),\n    pickup_zone = utf8()\n  ))\n\nnyc_taxi |&gt;\n  filter(year == 2019) |&gt;\n  left_join(pickup_location) |&gt;\n  filter(str_detect(pickup_zone, \"Airport\")) |&gt;\n  count(pickup_zone) |&gt;\n  collect()\n\n# A tibble: 3 × 2\n  pickup_zone             n\n  &lt;chr&gt;               &lt;int&gt;\n1 JFK Airport       2729336\n2 LaGuardia Airport 2159224\n3 Newark Airport       8643\n\n\n\n\n\n\n\n\n\n\n\n\n\nWindow functions\n\n\n\n\nProblemSolution 1\n\n\n\nHow many trips in September 2019 had a longer than average distance for that month?\n\n\n\n\nOption 1 - via DuckDB\n\nnyc_taxi |&gt;\n  filter(year == 2019, month == 9) |&gt;\n  to_duckdb() |&gt;\n  mutate(mean_distance = mean(trip_distance)) |&gt;\n  to_arrow() |&gt;\n  filter(trip_distance &lt; mean_distance) |&gt;\n  count() |&gt;\n  collect()\n\n# A tibble: 1 × 1\n        n\n    &lt;int&gt;\n1 4881580\n\n\n\n\nOption 2 - via a join\n\nnyc_taxi |&gt;\n  filter(year == 2019, month == 9) |&gt;\n  left_join(\n    nyc_taxi |&gt;\n      filter(year == 2019, month == 9) |&gt;\n      group_by(year) |&gt;\n      summarise(mean_distance = mean(trip_distance))\n    ) |&gt;\n  filter(trip_distance &lt; mean_distance) |&gt;\n  count() |&gt;\n  collect()\n\n# A tibble: 1 × 1\n        n\n    &lt;int&gt;\n1 4881580"
   },
   {
     "objectID": "materials/5_arrow_single_file-exercises.html",
diff --git a/sitemap.xml b/sitemap.xml
index fa25764..3df2543 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,66 +2,66 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/setup.html</loc>
-    <lastmod>2023-09-05T17:13:28.973Z</lastmod>
+    <lastmod>2023-09-06T11:10:57.739Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/7_continue_learning.html</loc>
-    <lastmod>2023-09-05T17:13:27.789Z</lastmod>
+    <lastmod>2023-09-06T11:10:56.359Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/5_arrow_single_file.html</loc>
-    <lastmod>2023-09-05T17:13:26.937Z</lastmod>
+    <lastmod>2023-09-06T11:10:55.411Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/4_data_manipulation_2.html</loc>
-    <lastmod>2023-09-05T17:13:22.689Z</lastmod>
+    <lastmod>2023-09-06T11:10:51.062Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/3_data_engineering.html</loc>
-    <lastmod>2023-09-05T17:13:21.401Z</lastmod>
+    <lastmod>2023-09-06T11:10:49.478Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/2_data_manipulation_1.html</loc>
-    <lastmod>2023-09-05T17:13:19.729Z</lastmod>
+    <lastmod>2023-09-06T11:10:47.634Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/1_hello_arrow.html</loc>
-    <lastmod>2023-09-05T17:13:18.289Z</lastmod>
+    <lastmod>2023-09-06T11:10:45.930Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/0_housekeeping.html</loc>
-    <lastmod>2023-09-05T17:13:16.593Z</lastmod>
+    <lastmod>2023-09-06T11:10:44.026Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/index.html</loc>
-    <lastmod>2023-09-05T17:13:15.101Z</lastmod>
+    <lastmod>2023-09-06T11:10:42.438Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/1_hello_arrow-exercises.html</loc>
-    <lastmod>2023-09-05T17:13:17.605Z</lastmod>
+    <lastmod>2023-09-06T11:10:45.170Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/2_data_manipulation_1-exercises.html</loc>
-    <lastmod>2023-09-05T17:13:18.981Z</lastmod>
+    <lastmod>2023-09-06T11:10:46.718Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/3_data_engineering-exercises.html</loc>
-    <lastmod>2023-09-05T17:13:20.525Z</lastmod>
+    <lastmod>2023-09-06T11:10:48.534Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/4_data_manipulation_2-exercises.html</loc>
-    <lastmod>2023-09-05T17:13:22.065Z</lastmod>
+    <lastmod>2023-09-06T11:10:50.262Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/5_arrow_single_file-exercises.html</loc>
-    <lastmod>2023-09-05T17:13:26.281Z</lastmod>
+    <lastmod>2023-09-06T11:10:54.699Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/6_wrapping_up.html</loc>
-    <lastmod>2023-09-05T17:13:27.493Z</lastmod>
+    <lastmod>2023-09-06T11:10:56.035Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/posit-conf-2023/arrow/materials/8_closing.html</loc>
-    <lastmod>2023-09-05T17:13:28.109Z</lastmod>
+    <lastmod>2023-09-06T11:10:56.707Z</lastmod>
   </url>
 </urlset>