All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
79 lines
2.4 KiB
Go
79 lines
2.4 KiB
Go
// Package staff provides university staff and publication crawling functionality
|
|
package staff
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
|
|
"github.com/breakpilot/edu-search-service/internal/database"
|
|
"github.com/breakpilot/edu-search-service/internal/orchestrator"
|
|
)
|
|
|
|
// PublicationOrchestratorAdapter adapts publication crawling to the orchestrator interface
|
|
// Note: This is a stub for now - publication crawling is a future feature
|
|
type PublicationOrchestratorAdapter struct {
|
|
repo *database.Repository
|
|
}
|
|
|
|
// NewPublicationOrchestratorAdapter creates a new publication crawler adapter
|
|
func NewPublicationOrchestratorAdapter(repo *database.Repository) *PublicationOrchestratorAdapter {
|
|
return &PublicationOrchestratorAdapter{
|
|
repo: repo,
|
|
}
|
|
}
|
|
|
|
// CrawlPublicationsForUniversity crawls publications for all staff at a university
|
|
// This is Phase 4: Publication discovery (future implementation)
|
|
func (a *PublicationOrchestratorAdapter) CrawlPublicationsForUniversity(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
|
|
start := time.Now()
|
|
progress := &orchestrator.CrawlProgress{
|
|
Phase: orchestrator.PhasePublications,
|
|
StartedAt: start,
|
|
}
|
|
|
|
log.Printf("[PublicationAdapter] Publications phase for university %s", universityID)
|
|
|
|
// Get staff members for this university
|
|
staffList, err := a.repo.SearchStaff(ctx, database.StaffSearchParams{
|
|
UniversityID: &universityID,
|
|
Limit: 10000,
|
|
})
|
|
if err != nil {
|
|
progress.Errors = append(progress.Errors, err.Error())
|
|
return progress, err
|
|
}
|
|
|
|
log.Printf("[PublicationAdapter] Found %d staff members for publication crawling", staffList.Total)
|
|
|
|
// TODO: Implement actual publication crawling
|
|
// - For each staff member with ORCID/Google Scholar ID:
|
|
// - Fetch publications from ORCID API
|
|
// - Fetch publications from Google Scholar
|
|
// - Match and deduplicate
|
|
// - Store in database
|
|
//
|
|
// For now, we mark this phase as complete (no-op)
|
|
|
|
pubCount := 0
|
|
|
|
// Count existing publications for this university
|
|
for _, staff := range staffList.Staff {
|
|
pubs, err := a.repo.GetStaffPublications(ctx, staff.ID)
|
|
if err == nil {
|
|
pubCount += len(pubs)
|
|
}
|
|
}
|
|
|
|
progress.ItemsFound = pubCount
|
|
progress.ItemsProcessed = staffList.Total
|
|
now := time.Now()
|
|
progress.CompletedAt = &now
|
|
|
|
log.Printf("[PublicationAdapter] Publications phase completed for university %s: %d existing publications found", universityID, pubCount)
|
|
|
|
return progress, nil
|
|
}
|