package v3 import ( "encoding/json" "log/slog" "net/http" "strconv" "strings" "sync" ) type hydraResponse struct { TotalItems int `json:"totalItems"` View hydraView `json:"hydra:view"` } type hydraView struct { First string `json:"hydra:first"` Last string `json:"hydra:last"` Next string `json:"hydra:next"` } func paginatedFetch(url string, itemsPerPage int, client *http.Client) ([]string, error) { // the url is paginated, so we need to fetch all pages // example url: https://luna.htwk-leipzig.de/api/studiengangstypen?page=1&itemsPerPage=100 // the itemsPerPage is set to 100, so we need to fetch all pages until we get an empty response var firstPage = 1 var responses []string link := url + "?page=" + strconv.Itoa(firstPage) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage) response, err := requestPage(link, client) if err != nil { slog.Error("Error while fetching paginated api", err) return nil, err } //extract the first and last page from the response var hydra hydraResponse err = json.Unmarshal([]byte(response), &hydra) if err != nil { slog.Error("Error while unmarshalling hydra response", err, link) return nil, err } var lastPage = extractPageNumber(hydra.View.Last) responses = append(responses, response) // prepare the links for the multithreaded requests var links []string for i := firstPage + 1; i <= lastPage; i++ { link := url + "?page=" + strconv.Itoa(i) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage) links = append(links, link) } //multithreading webpage requests to speed up the process var maxThreads = 20 var htmlPageArray = make([]string, len(links)) var wg sync.WaitGroup wg.Add(maxThreads) for i := 0; i < maxThreads; i++ { go func(i int) { for j := i; j < len(links); j += maxThreads { doc, err := requestPage(links[j], client) if err == nil { htmlPageArray[j] = doc } } wg.Done() }(i) } wg.Wait() responses = append(responses, htmlPageArray...) return responses, nil } func requestPage(url string, client *http.Client) (string, error) { response, err := requestJSON(url, client) if err != nil { slog.Error("Error while fetching paginated api", err) return "", err } return response, nil } func extractPageNumber(url string) int { if url == "" { return 0 } split := strings.Split(url, "page=") lastPart := split[len(split)-1] pageNumber, err := strconv.Atoi(lastPart) if err != nil { slog.Error("Error while extracting page number", err) return 0 } return pageNumber }