Pagination

Some endpoints return multiple items at once. In that case, when relevant pagination.previous and pagination.next cursors URLs are returned in the response. You can use these cursors to navigate in both directions throughout the result items. A count parameter is also exposed to define how much items to return.
Note that these cursors are only valid for around 2 minutes, so they are only suited for iterating through a large set of results programmatically.

To help you getting started, here are some functions for your languages of choice:

#!/usr/bin/env python3

import json
import requests
import urllib.parse
import time

def iterate(url, query=None, max_retries=5, timeout_seconds=10, retry_sleep_seconds=1, session=None):
    """
    Iterates over each result of the given paginated resource until its end, yielding each item in turn.
    You can iterate over the results of this method across many pages of results transparently.

    Args:
        url(str): The URL of the initial page of results to iterate on.
        query(dict): Helper argument to overwrite the query string portion of the url.
        max_retries(float): The maximum number of tries to do for each page.
        timeout_seconds(float): The maximum time to wait for fetching each page.
        retry_sleep_seconds(float): The time to wait before retrying a fetching a page.
        session(requests.Session): An optional existing requests Session for connection pooling and keepalive.

    Yields:
        dict: Each item of the paginated resource, typically a parsed JSON object, given as a dictionary object.

    Raises:
        requests.exceptions.HTTPError: If any non retryable HTTP error is encountered.
        Exception: After the maximum number of tries is exhausted, the last encountered exception is re-raised.

    Example:
        for installation in iterate('https://management-api.wonderpush.com/v1/installations', query={'accessToken': YOUR_ACCESS_TOKEN, 'limit': 1000}):
            print(installation.get('id'))
    """
    if session is None:
        # Create a requests session to allow for connection keepalive
        session = requests.Session()

    # Facilitate the construction of the URL query string
    if query is not None:
        url_parts = urllib.parse.urlparse(url)
        query_dict = urllib.parse.parse_qs(url_parts.query)
        query_dict.update(query)
        url_parts = url_parts._replace(query=urllib.parse.urlencode(query_dict, doseq=True))
        url = urllib.parse.urlunparse(url_parts)

    # Loop over pages of results until pagination.next is null
    while url:
        # Set up a retry loop
        retries = 0
        last_exception = None
        while retries < max_retries:
            try:
                # Fetch the page using the requests session
                print(url)
                response = session.get(url, timeout=timeout_seconds)
                response.raise_for_status()
                # Decode the JSON data
                data = response.json()
                # Output each item in the data array to standard output
                for item in data['data']:
                    yield item
                # Get the URL for the next page of results
                url = data['pagination']['next']
                # Exit the retry loop
                break
            except requests.exceptions.HTTPError as e:
                last_exception = e
                if e.response.status_code == 429 or e.response.status_code >= 500:
                    retries += 1
                    time.sleep(retry_sleep_seconds)
                else:
                    raise e
            except requests.exceptions.RequestException as e:
                #print(f'Error: {e}, retrying...')
                last_exception = e
                retries += 1
                time.sleep(retry_sleep_seconds)
            except Exception as e:
                raise e
        else:
            # If we've retried too many times, re-raise the last exception
            #print(f'Failed to fetch {url} after {max_retries} retries')
            raise e

#
# EXAMPLE
#

# Export all installations in a single JSON document per line on standard output
for item in iterate('https://management-api.wonderpush.com/v1/installations', query={'accessToken': YOUR_ACCESS_TOKEN, 'limit': 1000}):
    print(json.dumps(item))

#!/bin/sh

usage() {
	echo "Usage: $0 url"
	echo "Iterates over each result of the given paginated resource until its end,"
        echo "writing one item per line on the standard output."
	echo ""
	echo "Arguments:"
	echo "    url    The URL of the paginated resource to iterate onto."
	echo ""
	echo "In case of error, outputs the received body on the standard error output"
	echo "and exits with curl's exit code."
}

if [ "$1" = "-h" -o "$1" == "--help" ]; then
	usage
	exit 0
elif [ $# -ne 1 ]; then
	usage >&2
	exit 1
fi

iterate() {
	# Initial URL
	url="$1"
	# Iterate until we've reached the end
	while [ -n "$url" ]; do
		# Request one page
		response="$(curl -s --fail-with-body "$url")"
		# Handle curl errors
		exitcode=$?
		if [ $exitcode -ne 0 ]; then
			# Write the error on the standard error output and exit
			echo -n "$response" >&2
			exit $exitcode
		fi
		# Extract the next URL. Transform null into ""
		url="$(echo "$response" | jq -rc '.pagination.next | if . == null then "" else . end')"
		# Write each item of the page in the standard output
		echo "$response" | jq -rc '.data[]'
	done
}

iterate "$1"

#
# EXAMPLE
#

./iterate.sh "https://management-api.wonderpush.com/v1/installations?accessToken=${YOUR_ACCESS_TOKEN}&limit=1000"

#!/usr/bin/env php
<?php

/**
 * Sends an HTTPS GET request to a given URL, decodes the JSON response, yields each of the objects of the `data` array field,
 * and iterates on the next page of results by taking the URL inside the `pagination.next` field until it is `null`.
 *
 * @param string $url The URL to fetch data from.
 * @param array $options An optional array of options to configure the function:
 *   - query: An associative array to help you construct the desired URL to fetch.
 *   - timeout: The maximum number of seconds to wait for a response (default: 10).
 *   - max_retries: The maximum number of times to retry a request if it fails with a 429 or 5xx HTTP status code (default: 3).
 * @return \Generator A generator that yields each item in the `data` array field of the JSON response.
 * @throws \Exception If the request fails or the response is not a valid JSON string with a `data` array field.
 */
function iterate($url, $options = []) {
    if (isset($options['query']) && is_array($options['query'])) {
        // Merge the existing query string with the new parameters
        $url_parts = parse_url($url);
        $query_params = [];
        if (!empty($url_parts['query'])) {
            parse_str($url_parts['query'], $query_params);
        }
        $query_params = array_merge($query_params, $options['query']);
        // Rebuild the query string
        $url_parts['query'] = http_build_query($query_params);
        // Reconstruct the URL with the new query string
        $url = sprintf('%s://%s%s%s%s',
             $url_parts['scheme'],
             $url_parts['host'],
             !empty($url_parts['port']) ? ':' . $url_parts['port'] : '',
             $url_parts['path'] ? $url_parts['path'] : '',
             !empty($url_parts['query']) ? '?' . $url_parts['query'] : ''
        );
    }
    $timeout = isset($options['timeout']) ? $options['timeout'] : 10;
    $max_retries = isset($options['max_retries']) ? $options['max_retries'] : 3;
    $retries = 0;

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TCP_KEEPALIVE, true);
    curl_setopt($ch, CURLOPT_TCP_KEEPIDLE, 60); // in seconds
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
    while ($url !== null && $retries < $max_retries) {
        $response = false;
        curl_setopt($ch, CURLOPT_URL, $url);
        $response = curl_exec($ch);
        if (curl_errno($ch) != 0) {
            $retries++;
            $errno = curl_errno($ch);
            $error = curl_error($ch);
            error_log("API request failed with cURL errno {$errno}: {$error}, retrying ({$retries}/{$max_retries})");
            continue;
        }
        $http_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);

        if ($http_status === 429 || $http_status >= 500) {
            $retries++;
            error_log("API request failed with HTTP status {$http_status}, retrying ({$retries}/{$max_retries})");
            continue;
        } else if ($http_status !== 200) {
            curl_close($ch);
            throw new Exception("HTTP error: " . $http_status);
        }

        $json = json_decode($response, true);
        if (!$json) {
            curl_close($ch);
            throw new Exception("Failed to decode JSON response");
        }

        if (!isset($json['data']) || !is_array($json['data'])) {
            curl_close($ch);
            throw new Exception("Missing or invalid 'data' field in JSON response");
        }

        foreach ($json['data'] as $item) {
            yield $item;
        }

        if (isset($json['pagination']['next']) && $json['pagination']['next'] !== null) {
            $url = $json['pagination']['next'];
        } else {
            $url = null;
        }
    }
    curl_close($ch);

    if ($retries === $max_retries) {
        throw new Exception("API request failed after {$retries} retries");
    }
}

//
// Example
//

$YOUR_ACCESS_TOKEN = '…';
foreach (iterate('https://management-api.wonderpush.com/v1/installations', ['query' => ['accessToken' => $YOUR_ACCESS_TOKEN, 'limit' => 1000]]) as $item) {
    echo json_encode($item), "\n";
}