3. async with crul

Scott Chamberlain

2020-07-09

Asynchronous requests with crul.

There are two interfaces to asynchronous requests in crul:

  1. Simple async: any number of URLs, all treated with the same curl options, headers, etc., and only one HTTP method type at a time.
  2. Varied request async: build any type of request and execute all asynchronously.

The first option takes less thinking, less work, and is good solution when you just want to hit a bunch of URLs asynchronously.

The second option is ideal when you want to set curl options/headers on each request and/or want to do different types of HTTP methods on each request.

One thing to think about before using async is whether the data provider is okay with it. It’s possible that a data provider’s service may be brought down if you do too many async requests.

library("crul")

simple async

Build request object with 1 or more URLs

(cc <- Async$new(
  urls = c(
    'https://httpbin.org/get?a=5',
    'https://httpbin.org/get?a=5&b=6',
    'https://httpbin.org/ip'
  )
))
#> <crul async connection> 
#>   curl options: 
#>   proxies: 
#>   auth: 
#>   headers: 
#>   urls: (n: 3)
#>    https://httpbin.org/get?a=5
#>    https://httpbin.org/get?a=5&b=6
#>    https://httpbin.org/ip

Make request with any HTTP method

(res <- cc$get())
#> [[1]]
#> <crul response> 
#>   url: https://httpbin.org/get?a=5
#>   request_headers: 
#>   response_headers: 
#>     status: HTTP/2 200 
#>     date: Thu, 09 Jul 2020 20:14:45 GMT
#>     content-type: application/json
#>     content-length: 400
#>     server: gunicorn/19.9.0
#>     access-control-allow-origin: *
#>     access-control-allow-credentials: true
#>   params: 
#>     a: 5
#>   status: 200
#> 
#> [[2]]
#> <crul response> 
#>   url: https://httpbin.org/get?a=5&b=6
#>   request_headers: 
#>   response_headers: 
#>     status: HTTP/2 200 
#>     date: Thu, 09 Jul 2020 20:14:45 GMT
#>     content-type: application/json
#>     content-length: 419
#>     server: gunicorn/19.9.0
#>     access-control-allow-origin: *
#>     access-control-allow-credentials: true
#>   params: 
#>     a: 5
#>     b: 6
#>   status: 200
#> 
#> [[3]]
#> <crul response> 
#>   url: https://httpbin.org/ip
#>   request_headers: 
#>   response_headers: 
#>     status: HTTP/2 200 
#>     date: Thu, 09 Jul 2020 20:14:45 GMT
#>     content-type: application/json
#>     content-length: 31
#>     server: gunicorn/19.9.0
#>     access-control-allow-origin: *
#>     access-control-allow-credentials: true
#>   status: 200

You get back a list matching length of the number of input URLs

Access object variables and methods just as with HttpClient results, here just one at a time.

res[[1]]$url
#> [1] "https://httpbin.org/get?a=5"
res[[1]]$success()
#> [1] TRUE
res[[1]]$parse("UTF-8")
#> [1] "{\n  \"args\": {\n    \"a\": \"5\"\n  }, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"gzip, deflate\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"R (4.0.2 x86_64-apple-darwin17.0 x86_64 darwin17.0)\", \n    \"X-Amzn-Trace-Id\": \"Root=1-5f077ab5-686fd087166386a10fe80f25\"\n  }, \n  \"origin\": \"24.21.229.59\", \n  \"url\": \"https://httpbin.org/get?a=5\"\n}\n"

Or apply access/method calls across many results, e.g., parse all results

lapply(res, function(z) z$parse("UTF-8"))
#> [[1]]
#> [1] "{\n  \"args\": {\n    \"a\": \"5\"\n  }, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"gzip, deflate\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"R (4.0.2 x86_64-apple-darwin17.0 x86_64 darwin17.0)\", \n    \"X-Amzn-Trace-Id\": \"Root=1-5f077ab5-686fd087166386a10fe80f25\"\n  }, \n  \"origin\": \"24.21.229.59\", \n  \"url\": \"https://httpbin.org/get?a=5\"\n}\n"
#> 
#> [[2]]
#> [1] "{\n  \"args\": {\n    \"a\": \"5\", \n    \"b\": \"6\"\n  }, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"gzip, deflate\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"R (4.0.2 x86_64-apple-darwin17.0 x86_64 darwin17.0)\", \n    \"X-Amzn-Trace-Id\": \"Root=1-5f077ab5-0c5caed1b0cac8d405458e4e\"\n  }, \n  \"origin\": \"24.21.229.59\", \n  \"url\": \"https://httpbin.org/get?a=5&b=6\"\n}\n"
#> 
#> [[3]]
#> [1] "{\n  \"origin\": \"24.21.229.59\"\n}\n"

varied request async

req1 <- HttpRequest$new(
  url = "https://httpbin.org/get?a=5",
  opts = list(
    verbose = TRUE
  )
)
req1$get()
#> <crul http request> get
#>   url: https://httpbin.org/get?a=5
#>   curl options: 
#>     verbose: TRUE
#>   proxies: 
#>   auth: 
#>   headers: 
#>   progress: FALSE

req2 <- HttpRequest$new(
  url = "https://httpbin.org/post?a=5&b=6"
)
req2$post(body = list(a = 5))
#> <crul http request> post
#>   url: https://httpbin.org/post?a=5&b=6
#>   curl options: 
#>   proxies: 
#>   auth: 
#>   headers: 
#>   progress: FALSE

(res <- AsyncVaried$new(req1, req2))
#> <crul async varied connection>
#>   requests: (n: 2)
#>    get: https://httpbin.org/get?a=5 
#>    post: https://httpbin.org/post?a=5&b=6

Make requests asynchronously

res$request()

Parse all results

res$parse()
#> [1] "{\n  \"args\": {\n    \"a\": \"5\"\n  }, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"gzip, deflate\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"R (4.0.2 x86_64-apple-darwin17.0 x86_64 darwin17.0)\", \n    \"X-Amzn-Trace-Id\": \"Root=1-5f077ab6-74ff987c9d00dd500c8ad920\"\n  }, \n  \"origin\": \"24.21.229.59\", \n  \"url\": \"https://httpbin.org/get?a=5\"\n}\n"                                                                                                                                                                                                                                                       
#> [2] "{\n  \"args\": {\n    \"a\": \"5\", \n    \"b\": \"6\"\n  }, \n  \"data\": \"\", \n  \"files\": {}, \n  \"form\": {\n    \"a\": \"5\"\n  }, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"gzip, deflate\", \n    \"Content-Length\": \"137\", \n    \"Content-Type\": \"multipart/form-data; boundary=------------------------cb67be38c1a8a084\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"libcurl/7.64.1 r-curl/4.3 crul/0.9.2.93\", \n    \"X-Amzn-Trace-Id\": \"Root=1-5f077ab6-d206c2e86f429778e6cb9810\"\n  }, \n  \"json\": null, \n  \"origin\": \"24.21.229.59\", \n  \"url\": \"https://httpbin.org/post?a=5&b=6\"\n}\n"
lapply(res$parse(), jsonlite::prettify)
#> [[1]]
#> {
#>     "args": {
#>         "a": "5"
#>     },
#>     "headers": {
#>         "Accept": "application/json, text/xml, application/xml, */*",
#>         "Accept-Encoding": "gzip, deflate",
#>         "Host": "httpbin.org",
#>         "User-Agent": "R (4.0.2 x86_64-apple-darwin17.0 x86_64 darwin17.0)",
#>         "X-Amzn-Trace-Id": "Root=1-5f077ab6-74ff987c9d00dd500c8ad920"
#>     },
#>     "origin": "24.21.229.59",
#>     "url": "https://httpbin.org/get?a=5"
#> }
#>  
#> 
#> [[2]]
#> {
#>     "args": {
#>         "a": "5",
#>         "b": "6"
#>     },
#>     "data": "",
#>     "files": {
#> 
#>     },
#>     "form": {
#>         "a": "5"
#>     },
#>     "headers": {
#>         "Accept": "application/json, text/xml, application/xml, */*",
#>         "Accept-Encoding": "gzip, deflate",
#>         "Content-Length": "137",
#>         "Content-Type": "multipart/form-data; boundary=------------------------cb67be38c1a8a084",
#>         "Host": "httpbin.org",
#>         "User-Agent": "libcurl/7.64.1 r-curl/4.3 crul/0.9.2.93",
#>         "X-Amzn-Trace-Id": "Root=1-5f077ab6-d206c2e86f429778e6cb9810"
#>     },
#>     "json": null,
#>     "origin": "24.21.229.59",
#>     "url": "https://httpbin.org/post?a=5&b=6"
#> }
#> 

Status codes

res$status_code()
#> [1] 200 200