breakpilot-lehrer/edu-search-service/internal/staff/publication_adapter.go

// Package staff provides university staff and publication crawling functionality
package staff

import (
	"context"
	"log"
	"time"

	"github.com/google/uuid"

	"github.com/breakpilot/edu-search-service/internal/database"
	"github.com/breakpilot/edu-search-service/internal/orchestrator"
)

// PublicationOrchestratorAdapter adapts publication crawling to the orchestrator interface
// Note: This is a stub for now - publication crawling is a future feature
type PublicationOrchestratorAdapter struct {
	repo *database.Repository
}

// NewPublicationOrchestratorAdapter creates a new publication crawler adapter
func NewPublicationOrchestratorAdapter(repo *database.Repository) *PublicationOrchestratorAdapter {
	return &PublicationOrchestratorAdapter{
		repo: repo,
	}
}

// CrawlPublicationsForUniversity crawls publications for all staff at a university
// This is Phase 4: Publication discovery (future implementation)
func (a *PublicationOrchestratorAdapter) CrawlPublicationsForUniversity(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
	start := time.Now()
	progress := &orchestrator.CrawlProgress{
		Phase:     orchestrator.PhasePublications,
		StartedAt: start,
	}

	log.Printf("[PublicationAdapter] Publications phase for university %s", universityID)

	// Get staff members for this university
	staffList, err := a.repo.SearchStaff(ctx, database.StaffSearchParams{
		UniversityID: &universityID,
		Limit:        10000,
	})
	if err != nil {
		progress.Errors = append(progress.Errors, err.Error())
		return progress, err
	}

	log.Printf("[PublicationAdapter] Found %d staff members for publication crawling", staffList.Total)

	// TODO: Implement actual publication crawling
	// - For each staff member with ORCID/Google Scholar ID:
	//   - Fetch publications from ORCID API
	//   - Fetch publications from Google Scholar
	//   - Match and deduplicate
	//   - Store in database
	//
	// For now, we mark this phase as complete (no-op)

	pubCount := 0

	// Count existing publications for this university
	for _, staff := range staffList.Staff {
		pubs, err := a.repo.GetStaffPublications(ctx, staff.ID)
		if err == nil {
			pubCount += len(pubs)
		}
	}

	progress.ItemsFound = pubCount
	progress.ItemsProcessed = staffList.Total
	now := time.Now()
	progress.CompletedAt = &now

	log.Printf("[PublicationAdapter] Publications phase completed for university %s: %d existing publications found", universityID, pubCount)

	return progress, nil
}