1. crul introduction

Scott Chamberlain

2020-07-10

crul is an HTTP client for R.

Install

Stable CRAN version

install.packages("crul")

Dev version

remotes::install_github("ropensci/crul")
library("crul")

HttpClient - the main interface

HttpClient is where to start

(x <- HttpClient$new(
  url = "https://httpbin.org",
  opts = list(
    timeout = 1
  ),
  headers = list(
    a = "hello world"
  )
))
#> <crul connection> 
#>   url: https://httpbin.org
#>   curl options: 
#>     timeout: 1
#>   proxies: 
#>   auth: 
#>   headers: 
#>     a: hello world
#>   progress: FALSE
#>   hooks:

Makes a R6 class, that has all the bits and bobs you’d expect for doing HTTP requests. When it prints, it gives any defaults you’ve set. As you update the object you can see what’s been set

x$opts
#> $timeout
#> [1] 1
x$headers
#> $a
#> [1] "hello world"

Do some HTTP requests

The client object created above has http methods that you can call, and pass paths to, as well as query parameters, body values, and any other curl options.

Here, we’ll do a GET request on the route /get on our base url https://httpbin.org (the full url is then https://httpbin.org/get)

res <- x$get("get")

The response from a http request is another R6 class HttpResponse, which has slots for the outputs of the request, and some functions to deal with the response:

Status code

res$status_code
#> [1] 200

The content

res$content
#>   [1] 7b 0a 20 20 22 61 72 67 73 22 3a 20 7b 7d 2c 20 0a 20 20 22 68 65 61 64 65
#>  [26] 72 73 22 3a 20 7b 0a 20 20 20 20 22 41 22 3a 20 22 68 65 6c 6c 6f 20 77 6f
#>  [51] 72 6c 64 22 2c 20 0a 20 20 20 20 22 41 63 63 65 70 74 22 3a 20 22 61 70 70
#>  [76] 6c 69 63 61 74 69 6f 6e 2f 6a 73 6f 6e 2c 20 74 65 78 74 2f 78 6d 6c 2c 20
#> [101] 61 70 70 6c 69 63 61 74 69 6f 6e 2f 78 6d 6c 2c 20 2a 2f 2a 22 2c 20 0a 20
#> [126] 20 20 20 22 41 63 63 65 70 74 2d 45 6e 63 6f 64 69 6e 67 22 3a 20 22 67 7a
#> [151] 69 70 2c 20 64 65 66 6c 61 74 65 22 2c 20 0a 20 20 20 20 22 48 6f 73 74 22
#> [176] 3a 20 22 68 74 74 70 62 69 6e 2e 6f 72 67 22 2c 20 0a 20 20 20 20 22 55 73
#> [201] 65 72 2d 41 67 65 6e 74 22 3a 20 22 6c 69 62 63 75 72 6c 2f 37 2e 36 34 2e
#> [226] 31 20 72 2d 63 75 72 6c 2f 34 2e 33 20 63 72 75 6c 2f 30 2e 39 2e 34 2e 39
#> [251] 31 22 2c 20 0a 20 20 20 20 22 58 2d 41 6d 7a 6e 2d 54 72 61 63 65 2d 49 64
#> [276] 22 3a 20 22 52 6f 6f 74 3d 31 2d 35 66 30 38 64 36 63 65 2d 61 61 32 30 39
#> [301] 37 30 64 63 62 63 31 33 64 30 61 37 65 38 66 32 35 65 36 22 0a 20 20 7d 2c
#> [326] 20 0a 20 20 22 6f 72 69 67 69 6e 22 3a 20 22 32 34 2e 32 31 2e 32 32 39 2e
#> [351] 35 39 22 2c 20 0a 20 20 22 75 72 6c 22 3a 20 22 68 74 74 70 73 3a 2f 2f 68
#> [376] 74 74 70 62 69 6e 2e 6f 72 67 2f 67 65 74 22 0a 7d 0a

HTTP method

res$method
#> [1] "get"

Request headers

res$request_headers
#> $`User-Agent`
#> [1] "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91"
#> 
#> $`Accept-Encoding`
#> [1] "gzip, deflate"
#> 
#> $Accept
#> [1] "application/json, text/xml, application/xml, */*"
#> 
#> $a
#> [1] "hello world"

Response headers

res$response_headers
#> $status
#> [1] "HTTP/2 200 "
#> 
#> $date
#> [1] "Fri, 10 Jul 2020 20:59:58 GMT"
#> 
#> $`content-type`
#> [1] "application/json"
#> 
#> $`content-length`
#> [1] "393"
#> 
#> $server
#> [1] "gunicorn/19.9.0"
#> 
#> $`access-control-allow-origin`
#> [1] "*"
#> 
#> $`access-control-allow-credentials`
#> [1] "true"

All response headers, including intermediate headers, if any

res$response_headers_all
#> [[1]]
#> [[1]]$status
#> [1] "HTTP/2 200 "
#> 
#> [[1]]$date
#> [1] "Fri, 10 Jul 2020 20:59:58 GMT"
#> 
#> [[1]]$`content-type`
#> [1] "application/json"
#> 
#> [[1]]$`content-length`
#> [1] "393"
#> 
#> [[1]]$server
#> [1] "gunicorn/19.9.0"
#> 
#> [[1]]$`access-control-allow-origin`
#> [1] "*"
#> 
#> [[1]]$`access-control-allow-credentials`
#> [1] "true"

And you can parse the content with a provided function:

res$parse()
#> [1] "{\n  \"args\": {}, \n  \"headers\": {\n    \"A\": \"hello world\", \n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"gzip, deflate\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91\", \n    \"X-Amzn-Trace-Id\": \"Root=1-5f08d6ce-aa20970dcbc13d0a7e8f25e6\"\n  }, \n  \"origin\": \"24.21.229.59\", \n  \"url\": \"https://httpbin.org/get\"\n}\n"
jsonlite::fromJSON(res$parse())
#> $args
#> named list()
#> 
#> $headers
#> $headers$A
#> [1] "hello world"
#> 
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#> 
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#> 
#> $headers$Host
#> [1] "httpbin.org"
#> 
#> $headers$`User-Agent`
#> [1] "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91"
#> 
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5f08d6ce-aa20970dcbc13d0a7e8f25e6"
#> 
#> 
#> $origin
#> [1] "24.21.229.59"
#> 
#> $url
#> [1] "https://httpbin.org/get"

With the HttpClient object, which holds any configuration stuff we set, we can make other HTTP verb requests. For example, a HEAD request:

x$post(
  path = "post", 
  body = list(hello = "world")
)

write to disk

x <- HttpClient$new(url = "https://httpbin.org")
f <- tempfile()
res <- x$get(disk = f)
# when using write to disk, content is a path
res$content 
#> [1] "/var/folders/fc/n7g_vrvn0sx_st0p8lxb3ts40000gn/T//RtmppSPqIf/file20b5651a4a3a"

Read lines

readLines(res$content, n = 10)
#>  [1] "<!DOCTYPE html>"                                                                                                               
#>  [2] "<html lang=\"en\">"                                                                                                            
#>  [3] ""                                                                                                                              
#>  [4] "<head>"                                                                                                                        
#>  [5] "    <meta charset=\"UTF-8\">"                                                                                                  
#>  [6] "    <title>httpbin.org</title>"                                                                                                
#>  [7] "    <link href=\"https://fonts.googleapis.com/css?family=Open+Sans:400,700|Source+Code+Pro:300,600|Titillium+Web:400,600,700\""
#>  [8] "        rel=\"stylesheet\">"                                                                                                   
#>  [9] "    <link rel=\"stylesheet\" type=\"text/css\" href=\"/flasgger_static/swagger-ui.css\">"                                      
#> [10] "    <link rel=\"icon\" type=\"image/png\" href=\"/static/favicon.ico\" sizes=\"64x64 32x32 16x16\" />"

stream data

(x <- HttpClient$new(url = "https://httpbin.org"))
#> <crul connection> 
#>   url: https://httpbin.org
#>   curl options: 
#>   proxies: 
#>   auth: 
#>   headers: 
#>   progress: FALSE
#>   hooks:
res <- x$get('stream/5', stream = function(x) cat(rawToChar(x)))
#> {"url": "https://httpbin.org/stream/5", "args": {}, "headers": {"Host": "httpbin.org", "X-Amzn-Trace-Id": "Root=1-5f08d6cf-871364ce53d065c8d32eefd4", "User-Agent": "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91", "Accept-Encoding": "gzip, deflate", "Accept": "application/json, text/xml, application/xml, */*"}, "origin": "24.21.229.59", "id": 0}
#> {"url": "https://httpbin.org/stream/5", "args": {}, "headers": {"Host": "httpbin.org", "X-Amzn-Trace-Id": "Root=1-5f08d6cf-871364ce53d065c8d32eefd4", "User-Agent": "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91", "Accept-Encoding": "gzip, deflate", "Accept": "application/json, text/xml, application/xml, */*"}, "origin": "24.21.229.59", "id": 1}
#> {"url": "https://httpbin.org/stream/5", "args": {}, "headers": {"Host": "httpbin.org", "X-Amzn-Trace-Id": "Root=1-5f08d6cf-871364ce53d065c8d32eefd4", "User-Agent": "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91", "Accept-Encoding": "gzip, deflate", "Accept": "application/json, text/xml, application/xml, */*"}, "origin": "24.21.229.59", "id": 2}
#> {"url": "https://httpbin.org/stream/5", "args": {}, "headers": {"Host": "httpbin.org", "X-Amzn-Trace-Id": "Root=1-5f08d6cf-871364ce53d065c8d32eefd4", "User-Agent": "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91", "Accept-Encoding": "gzip, deflate", "Accept": "application/json, text/xml, application/xml, */*"}, "origin": "24.21.229.59", "id": 3}
#> {"url": "https://httpbin.org/stream/5", "args": {}, "headers": {"Host": "httpbin.org", "X-Amzn-Trace-Id": "Root=1-5f08d6cf-871364ce53d065c8d32eefd4", "User-Agent": "libcurl/7.64.1 r-curl/4.3 crul/0.9.4.91", "Accept-Encoding": "gzip, deflate", "Accept": "application/json, text/xml, application/xml, */*"}, "origin": "24.21.229.59", "id": 4}
# when streaming, content is NULL
res$content 
#> NULL

Learn more

Learn more with the other vignettes: