mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-13 15:13:51 +02:00
feat:#39 added fetcher for Events
This commit is contained in:
104
backend/service/fetch/v3/paginatedFetch.go
Normal file
104
backend/service/fetch/v3/paginatedFetch.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package v3
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type hydraResponse struct {
|
||||
TotalItems int `json:"totalItems"`
|
||||
View hydraView `json:"hydra:view"`
|
||||
}
|
||||
|
||||
type hydraView struct {
|
||||
First string `json:"hydra:first"`
|
||||
Last string `json:"hydra:last"`
|
||||
Next string `json:"hydra:next"`
|
||||
}
|
||||
|
||||
func paginatedFetch(url string, itemsPerPage int, client *http.Client) ([]string, error) {
|
||||
|
||||
// the url is paginated, so we need to fetch all pages
|
||||
// example url: https://luna.htwk-leipzig.de/api/studiengangstypen?page=1&itemsPerPage=100
|
||||
// the itemsPerPage is set to 100, so we need to fetch all pages until we get an empty response
|
||||
|
||||
var firstPage = 1
|
||||
var responses []string
|
||||
link := url + "?page=" + strconv.Itoa(firstPage) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage)
|
||||
response, err := requestPage(link, client)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Error while fetching paginated api", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
//extract the first and last page from the response
|
||||
var hydra hydraResponse
|
||||
err = json.Unmarshal([]byte(response), &hydra)
|
||||
if err != nil {
|
||||
slog.Error("Error while unmarshalling hydra response", err, link)
|
||||
return nil, err
|
||||
}
|
||||
var lastPage = extractPageNumber(hydra.View.Last)
|
||||
responses = append(responses, response)
|
||||
|
||||
// prepare the links for the multithreaded requests
|
||||
var links []string
|
||||
for i := firstPage + 1; i <= lastPage; i++ {
|
||||
link := url + "?page=" + strconv.Itoa(i) + "&itemsPerPage=" + strconv.Itoa(itemsPerPage)
|
||||
links = append(links, link)
|
||||
}
|
||||
|
||||
//multithreading webpage requests to speed up the process
|
||||
|
||||
var maxThreads = 20
|
||||
var htmlPageArray = make([]string, len(links))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(maxThreads)
|
||||
for i := 0; i < maxThreads; i++ {
|
||||
go func(i int) {
|
||||
for j := i; j < len(links); j += maxThreads {
|
||||
slog.Info("Fetching page: " + strconv.Itoa(j) + " of " + strconv.Itoa(len(links)))
|
||||
doc, err := requestPage(links[j], client)
|
||||
if err == nil {
|
||||
htmlPageArray[j] = doc
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
responses = append(responses, htmlPageArray...)
|
||||
|
||||
return responses, nil
|
||||
}
|
||||
|
||||
func requestPage(url string, client *http.Client) (string, error) {
|
||||
response, err := requestJSON(url, client)
|
||||
if err != nil {
|
||||
slog.Error("Error while fetching paginated api", err)
|
||||
return "", err
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func extractPageNumber(url string) int {
|
||||
|
||||
if url == "" {
|
||||
return 0
|
||||
}
|
||||
split := strings.Split(url, "page=")
|
||||
lastPart := split[len(split)-1]
|
||||
pageNumber, err := strconv.Atoi(lastPart)
|
||||
if err != nil {
|
||||
slog.Error("Error while extracting page number", err)
|
||||
return 0
|
||||
}
|
||||
return pageNumber
|
||||
}
|
Reference in New Issue
Block a user