Some endpoints return multiple items at once. In that case, when relevant pagination.previous
and pagination.next
cursors URLs are returned in the response. You can use these cursors to navigate in both directions throughout the result items. A count parameter is also exposed to define how much items to return.
Note that these cursors are only valid for around 2 minutes, so they are only suited for iterating through a large set of results programmatically.
To help you getting started, here are some functions for your languages of choice:
#!/usr/bin/env python3
import json
import requests
import urllib.parse
import time
def iterate(url, query=None, max_retries=5, timeout_seconds=10, retry_sleep_seconds=1, session=None):
"""
Iterates over each result of the given paginated resource until its end, yielding each item in turn.
You can iterate over the results of this method across many pages of results transparently.
Args:
url(str): The URL of the initial page of results to iterate on.
query(dict): Helper argument to overwrite the query string portion of the url.
max_retries(float): The maximum number of tries to do for each page.
timeout_seconds(float): The maximum time to wait for fetching each page.
retry_sleep_seconds(float): The time to wait before retrying a fetching a page.
session(requests.Session): An optional existing requests Session for connection pooling and keepalive.
Yields:
dict: Each item of the paginated resource, typically a parsed JSON object, given as a dictionary object.
Raises:
requests.exceptions.HTTPError: If any non retryable HTTP error is encountered.
Exception: After the maximum number of tries is exhausted, the last encountered exception is re-raised.
Example:
for installation in iterate('https://management-api.wonderpush.com/v1/installations', query={'accessToken': YOUR_ACCESS_TOKEN, 'limit': 1000}):
print(installation.get('id'))
"""
if session is None:
# Create a requests session to allow for connection keepalive
session = requests.Session()
# Facilitate the construction of the URL query string
if query is not None:
url_parts = urllib.parse.urlparse(url)
query_dict = urllib.parse.parse_qs(url_parts.query)
query_dict.update(query)
url_parts = url_parts._replace(query=urllib.parse.urlencode(query_dict, doseq=True))
url = urllib.parse.urlunparse(url_parts)
# Loop over pages of results until pagination.next is null
while url:
# Set up a retry loop
retries = 0
last_exception = None
while retries < max_retries:
try:
# Fetch the page using the requests session
print(url)
response = session.get(url, timeout=timeout_seconds)
response.raise_for_status()
# Decode the JSON data
data = response.json()
# Output each item in the data array to standard output
for item in data['data']:
yield item
# Get the URL for the next page of results
url = data['pagination']['next']
# Exit the retry loop
break
except requests.exceptions.HTTPError as e:
last_exception = e
if e.response.status_code == 429 or e.response.status_code >= 500:
retries += 1
time.sleep(retry_sleep_seconds)
else:
raise e
except requests.exceptions.RequestException as e:
#print(f'Error: {e}, retrying...')
last_exception = e
retries += 1
time.sleep(retry_sleep_seconds)
except Exception as e:
raise e
else:
# If we've retried too many times, re-raise the last exception
#print(f'Failed to fetch {url} after {max_retries} retries')
raise e
#
# EXAMPLE
#
# Export all installations in a single JSON document per line on standard output
for item in iterate('https://management-api.wonderpush.com/v1/installations', query={'accessToken': YOUR_ACCESS_TOKEN, 'limit': 1000}):
print(json.dumps(item))
#!/bin/sh
usage() {
echo "Usage: $0 url"
echo "Iterates over each result of the given paginated resource until its end,"
echo "writing one item per line on the standard output."
echo ""
echo "Arguments:"
echo " url The URL of the paginated resource to iterate onto."
echo ""
echo "In case of error, outputs the received body on the standard error output"
echo "and exits with curl's exit code."
}
if [ "$1" = "-h" -o "$1" == "--help" ]; then
usage
exit 0
elif [ $# -ne 1 ]; then
usage >&2
exit 1
fi
iterate() {
# Initial URL
url="$1"
# Iterate until we've reached the end
while [ -n "$url" ]; do
# Request one page
response="$(curl -s --fail-with-body "$url")"
# Handle curl errors
exitcode=$?
if [ $exitcode -ne 0 ]; then
# Write the error on the standard error output and exit
echo -n "$response" >&2
exit $exitcode
fi
# Extract the next URL. Transform null into ""
url="$(echo "$response" | jq -rc '.pagination.next | if . == null then "" else . end')"
# Write each item of the page in the standard output
echo "$response" | jq -rc '.data[]'
done
}
iterate "$1"
#
# EXAMPLE
#
./iterate.sh "https://management-api.wonderpush.com/v1/installations?accessToken=${YOUR_ACCESS_TOKEN}&limit=1000"
#!/usr/bin/env php
<?php
/**
* Sends an HTTPS GET request to a given URL, decodes the JSON response, yields each of the objects of the `data` array field,
* and iterates on the next page of results by taking the URL inside the `pagination.next` field until it is `null`.
*
* @param string $url The URL to fetch data from.
* @param array $options An optional array of options to configure the function:
* - query: An associative array to help you construct the desired URL to fetch.
* - timeout: The maximum number of seconds to wait for a response (default: 10).
* - max_retries: The maximum number of times to retry a request if it fails with a 429 or 5xx HTTP status code (default: 3).
* @return \Generator A generator that yields each item in the `data` array field of the JSON response.
* @throws \Exception If the request fails or the response is not a valid JSON string with a `data` array field.
*/
function iterate($url, $options = []) {
if (isset($options['query']) && is_array($options['query'])) {
// Merge the existing query string with the new parameters
$url_parts = parse_url($url);
$query_params = [];
if (!empty($url_parts['query'])) {
parse_str($url_parts['query'], $query_params);
}
$query_params = array_merge($query_params, $options['query']);
// Rebuild the query string
$url_parts['query'] = http_build_query($query_params);
// Reconstruct the URL with the new query string
$url = sprintf('%s://%s%s%s%s',
$url_parts['scheme'],
$url_parts['host'],
!empty($url_parts['port']) ? ':' . $url_parts['port'] : '',
$url_parts['path'] ? $url_parts['path'] : '',
!empty($url_parts['query']) ? '?' . $url_parts['query'] : ''
);
}
$timeout = isset($options['timeout']) ? $options['timeout'] : 10;
$max_retries = isset($options['max_retries']) ? $options['max_retries'] : 3;
$retries = 0;
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TCP_KEEPALIVE, true);
curl_setopt($ch, CURLOPT_TCP_KEEPIDLE, 60); // in seconds
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
while ($url !== null && $retries < $max_retries) {
$response = false;
curl_setopt($ch, CURLOPT_URL, $url);
$response = curl_exec($ch);
if (curl_errno($ch) != 0) {
$retries++;
$errno = curl_errno($ch);
$error = curl_error($ch);
error_log("API request failed with cURL errno {$errno}: {$error}, retrying ({$retries}/{$max_retries})");
continue;
}
$http_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_status === 429 || $http_status >= 500) {
$retries++;
error_log("API request failed with HTTP status {$http_status}, retrying ({$retries}/{$max_retries})");
continue;
} else if ($http_status !== 200) {
curl_close($ch);
throw new Exception("HTTP error: " . $http_status);
}
$json = json_decode($response, true);
if (!$json) {
curl_close($ch);
throw new Exception("Failed to decode JSON response");
}
if (!isset($json['data']) || !is_array($json['data'])) {
curl_close($ch);
throw new Exception("Missing or invalid 'data' field in JSON response");
}
foreach ($json['data'] as $item) {
yield $item;
}
if (isset($json['pagination']['next']) && $json['pagination']['next'] !== null) {
$url = $json['pagination']['next'];
} else {
$url = null;
}
}
curl_close($ch);
if ($retries === $max_retries) {
throw new Exception("API request failed after {$retries} retries");
}
}
//
// Example
//
$YOUR_ACCESS_TOKEN = '…';
foreach (iterate('https://management-api.wonderpush.com/v1/installations', ['query' => ['accessToken' => $YOUR_ACCESS_TOKEN, 'limit' => 1000]]) as $item) {
echo json_encode($item), "\n";
}