Files
htwkalender/backend/service/fetch/v3/paginatedFetch.go
2024-03-24 00:15:49 +01:00

104 lines
2.5 KiB
Go

package v3
import (
"encoding/json"
"log/slog"
"net/http"
"strconv"
"strings"
"sync"
)
type hydraResponse struct {
TotalItems int `json:"totalItems"`
View hydraView `json:"hydra:view"`
}
type hydraView struct {
First string `json:"hydra:first"`
Last string `json:"hydra:last"`
Next string `json:"hydra:next"`
}
func paginatedFetch(url string, itemsPerPage int, client *http.Client) ([]string, error) {
// the url is paginated, so we need to fetch all pages
// example url: https://luna.htwk-leipzig.de/api/studiengangstypen?page=1&itemsPerPage=100
// the itemsPerPage is set to 100, so we need to fetch all pages until we get an empty response
var firstPage = 1
var responses []string
link := url + "?page=" + strconv.Itoa(firstPage) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage)
response, err := requestPage(link, client)
if err != nil {
slog.Error("Error while fetching paginated api", err)
return nil, err
}
//extract the first and last page from the response
var hydra hydraResponse
err = json.Unmarshal([]byte(response), &hydra)
if err != nil {
slog.Error("Error while unmarshalling hydra response", err, link)
return nil, err
}
var lastPage = extractPageNumber(hydra.View.Last)
responses = append(responses, response)
// prepare the links for the multithreaded requests
var links []string
for i := firstPage + 1; i <= lastPage; i++ {
link := url + "?page=" + strconv.Itoa(i) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage)
links = append(links, link)
}
//multithreading webpage requests to speed up the process
var maxThreads = 20
var htmlPageArray = make([]string, len(links))
var wg sync.WaitGroup
wg.Add(maxThreads)
for i := 0; i < maxThreads; i++ {
go func(i int) {
for j := i; j < len(links); j += maxThreads {
doc, err := requestPage(links[j], client)
if err == nil {
htmlPageArray[j] = doc
}
}
wg.Done()
}(i)
}
wg.Wait()
responses = append(responses, htmlPageArray...)
return responses, nil
}
func requestPage(url string, client *http.Client) (string, error) {
response, err := requestJSON(url, client)
if err != nil {
slog.Error("Error while fetching paginated api", err)
return "", err
}
return response, nil
}
func extractPageNumber(url string) int {
if url == "" {
return 0
}
split := strings.Split(url, "page=")
lastPart := split[len(split)-1]
pageNumber, err := strconv.Atoi(lastPart)
if err != nil {
slog.Error("Error while extracting page number", err)
return 0
}
return pageNumber
}