mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-07-25 13:59:14 +02:00
104 lines
2.5 KiB
Go
104 lines
2.5 KiB
Go
package v3
|
|
|
|
import (
|
|
"encoding/json"
|
|
"log/slog"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
type hydraResponse struct {
|
|
TotalItems int `json:"totalItems"`
|
|
View hydraView `json:"hydra:view"`
|
|
}
|
|
|
|
type hydraView struct {
|
|
First string `json:"hydra:first"`
|
|
Last string `json:"hydra:last"`
|
|
Next string `json:"hydra:next"`
|
|
}
|
|
|
|
func paginatedFetch(url string, itemsPerPage int, client *http.Client) ([]string, error) {
|
|
|
|
// the url is paginated, so we need to fetch all pages
|
|
// example url: https://luna.htwk-leipzig.de/api/studiengangstypen?page=1&itemsPerPage=100
|
|
// the itemsPerPage is set to 100, so we need to fetch all pages until we get an empty response
|
|
|
|
var firstPage = 1
|
|
var responses []string
|
|
link := url + "?page=" + strconv.Itoa(firstPage) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage)
|
|
response, err := requestPage(link, client)
|
|
|
|
if err != nil {
|
|
slog.Error("Error while fetching paginated api", err)
|
|
return nil, err
|
|
}
|
|
|
|
//extract the first and last page from the response
|
|
var hydra hydraResponse
|
|
err = json.Unmarshal([]byte(response), &hydra)
|
|
if err != nil {
|
|
slog.Error("Error while unmarshalling hydra response", err, link)
|
|
return nil, err
|
|
}
|
|
var lastPage = extractPageNumber(hydra.View.Last)
|
|
responses = append(responses, response)
|
|
|
|
// prepare the links for the multithreaded requests
|
|
var links []string
|
|
for i := firstPage + 1; i <= lastPage; i++ {
|
|
link := url + "?page=" + strconv.Itoa(i) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage)
|
|
links = append(links, link)
|
|
}
|
|
|
|
//multithreading webpage requests to speed up the process
|
|
|
|
var maxThreads = 20
|
|
var htmlPageArray = make([]string, len(links))
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(maxThreads)
|
|
for i := 0; i < maxThreads; i++ {
|
|
go func(i int) {
|
|
for j := i; j < len(links); j += maxThreads {
|
|
doc, err := requestPage(links[j], client)
|
|
if err == nil {
|
|
htmlPageArray[j] = doc
|
|
}
|
|
}
|
|
wg.Done()
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
|
|
responses = append(responses, htmlPageArray...)
|
|
|
|
return responses, nil
|
|
}
|
|
|
|
func requestPage(url string, client *http.Client) (string, error) {
|
|
response, err := requestJSON(url, client)
|
|
if err != nil {
|
|
slog.Error("Error while fetching paginated api", err)
|
|
return "", err
|
|
}
|
|
return response, nil
|
|
}
|
|
|
|
func extractPageNumber(url string) int {
|
|
|
|
if url == "" {
|
|
return 0
|
|
}
|
|
split := strings.Split(url, "page=")
|
|
lastPart := split[len(split)-1]
|
|
pageNumber, err := strconv.Atoi(lastPart)
|
|
if err != nil {
|
|
slog.Error("Error while extracting page number", err)
|
|
return 0
|
|
}
|
|
return pageNumber
|
|
}
|