Files
breakpilot-lehrer/edu-search-service/internal/staff/publication_adapter.go
Benjamin Boenisch 414e0f5ec0
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
feat: edu-search-service migriert, voice-service/geo-service entfernt
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00

79 lines
2.4 KiB
Go

// Package staff provides university staff and publication crawling functionality
package staff
import (
"context"
"log"
"time"
"github.com/google/uuid"
"github.com/breakpilot/edu-search-service/internal/database"
"github.com/breakpilot/edu-search-service/internal/orchestrator"
)
// PublicationOrchestratorAdapter adapts publication crawling to the orchestrator interface
// Note: This is a stub for now - publication crawling is a future feature
type PublicationOrchestratorAdapter struct {
repo *database.Repository
}
// NewPublicationOrchestratorAdapter creates a new publication crawler adapter
func NewPublicationOrchestratorAdapter(repo *database.Repository) *PublicationOrchestratorAdapter {
return &PublicationOrchestratorAdapter{
repo: repo,
}
}
// CrawlPublicationsForUniversity crawls publications for all staff at a university
// This is Phase 4: Publication discovery (future implementation)
func (a *PublicationOrchestratorAdapter) CrawlPublicationsForUniversity(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
start := time.Now()
progress := &orchestrator.CrawlProgress{
Phase: orchestrator.PhasePublications,
StartedAt: start,
}
log.Printf("[PublicationAdapter] Publications phase for university %s", universityID)
// Get staff members for this university
staffList, err := a.repo.SearchStaff(ctx, database.StaffSearchParams{
UniversityID: &universityID,
Limit: 10000,
})
if err != nil {
progress.Errors = append(progress.Errors, err.Error())
return progress, err
}
log.Printf("[PublicationAdapter] Found %d staff members for publication crawling", staffList.Total)
// TODO: Implement actual publication crawling
// - For each staff member with ORCID/Google Scholar ID:
// - Fetch publications from ORCID API
// - Fetch publications from Google Scholar
// - Match and deduplicate
// - Store in database
//
// For now, we mark this phase as complete (no-op)
pubCount := 0
// Count existing publications for this university
for _, staff := range staffList.Staff {
pubs, err := a.repo.GetStaffPublications(ctx, staff.ID)
if err == nil {
pubCount += len(pubs)
}
}
progress.ItemsFound = pubCount
progress.ItemsProcessed = staffList.Total
now := time.Now()
progress.CompletedAt = &now
log.Printf("[PublicationAdapter] Publications phase completed for university %s: %d existing publications found", universityID, pubCount)
return progress, nil
}