package handlers import ( "net/http" "time" "github.com/gin-gonic/gin" "github.com/google/uuid" "github.com/breakpilot/edu-search-service/internal/database" ) // AIExtractionHandlers handles AI-based profile extraction endpoints // These endpoints are designed for vast.ai or similar AI services to: // 1. Get profile URLs that need extraction // 2. Submit extracted data back type AIExtractionHandlers struct { repo *database.Repository } // NewAIExtractionHandlers creates new AI extraction handlers func NewAIExtractionHandlers(repo *database.Repository) *AIExtractionHandlers { return &AIExtractionHandlers{repo: repo} } // ProfileExtractionTask represents a profile URL to be processed by AI type ProfileExtractionTask struct { StaffID uuid.UUID `json:"staff_id"` ProfileURL string `json:"profile_url"` UniversityID uuid.UUID `json:"university_id"` UniversityURL string `json:"university_url,omitempty"` FullName string `json:"full_name,omitempty"` CurrentData struct { Email string `json:"email,omitempty"` Phone string `json:"phone,omitempty"` Office string `json:"office,omitempty"` Position string `json:"position,omitempty"` Department string `json:"department,omitempty"` } `json:"current_data"` } // GetPendingProfiles returns staff profiles that need AI extraction // GET /api/v1/ai/extraction/pending?limit=10&university_id=... func (h *AIExtractionHandlers) GetPendingProfiles(c *gin.Context) { limit := parseIntDefault(c.Query("limit"), 10) if limit > 100 { limit = 100 } var universityID *uuid.UUID if uniIDStr := c.Query("university_id"); uniIDStr != "" { id, err := uuid.Parse(uniIDStr) if err == nil { universityID = &id } } // Get staff that have profile URLs but missing key data params := database.StaffSearchParams{ UniversityID: universityID, Limit: limit * 2, // Get more to filter } result, err := h.repo.SearchStaff(c.Request.Context(), params) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } // Filter to only include profiles that need extraction var tasks []ProfileExtractionTask for _, staff := range result.Staff { // Skip if no profile URL if staff.ProfileURL == nil || *staff.ProfileURL == "" { continue } // Include if missing email or other important data needsExtraction := staff.Email == nil || *staff.Email == "" if needsExtraction { task := ProfileExtractionTask{ StaffID: staff.ID, ProfileURL: *staff.ProfileURL, UniversityID: staff.UniversityID, } if staff.FullName != nil { task.FullName = *staff.FullName } if staff.Email != nil { task.CurrentData.Email = *staff.Email } if staff.Phone != nil { task.CurrentData.Phone = *staff.Phone } if staff.Office != nil { task.CurrentData.Office = *staff.Office } if staff.Position != nil { task.CurrentData.Position = *staff.Position } if staff.DepartmentName != nil { task.CurrentData.Department = *staff.DepartmentName } tasks = append(tasks, task) if len(tasks) >= limit { break } } } c.JSON(http.StatusOK, gin.H{ "tasks": tasks, "total": len(tasks), }) } // ExtractedProfileData represents data extracted by AI from a profile page type ExtractedProfileData struct { StaffID uuid.UUID `json:"staff_id" binding:"required"` // Contact info Email string `json:"email,omitempty"` Phone string `json:"phone,omitempty"` Office string `json:"office,omitempty"` // Professional info Position string `json:"position,omitempty"` PositionType string `json:"position_type,omitempty"` // professor, researcher, phd_student, staff AcademicTitle string `json:"academic_title,omitempty"` IsProfessor *bool `json:"is_professor,omitempty"` DepartmentName string `json:"department_name,omitempty"` // Hierarchy SupervisorName string `json:"supervisor_name,omitempty"` TeamRole string `json:"team_role,omitempty"` // leitung, mitarbeiter, sekretariat, hiwi, doktorand // Research ResearchInterests []string `json:"research_interests,omitempty"` ResearchSummary string `json:"research_summary,omitempty"` // Teaching (Lehrveranstaltungen) TeachingTopics []string `json:"teaching_topics,omitempty"` // External profiles ORCID string `json:"orcid,omitempty"` GoogleScholarID string `json:"google_scholar_id,omitempty"` ResearchgateURL string `json:"researchgate_url,omitempty"` LinkedInURL string `json:"linkedin_url,omitempty"` PersonalWebsite string `json:"personal_website,omitempty"` PhotoURL string `json:"photo_url,omitempty"` // Institute/Department links discovered InstituteURL string `json:"institute_url,omitempty"` InstituteName string `json:"institute_name,omitempty"` // Confidence score (0-1) Confidence float64 `json:"confidence,omitempty"` } // SubmitExtractedData saves AI-extracted profile data // POST /api/v1/ai/extraction/submit func (h *AIExtractionHandlers) SubmitExtractedData(c *gin.Context) { var data ExtractedProfileData if err := c.ShouldBindJSON(&data); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request: " + err.Error()}) return } // Get existing staff record staff, err := h.repo.GetStaff(c.Request.Context(), data.StaffID) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Staff not found"}) return } // Update fields if provided and not empty updated := false if data.Email != "" && (staff.Email == nil || *staff.Email == "") { staff.Email = &data.Email updated = true } if data.Phone != "" && (staff.Phone == nil || *staff.Phone == "") { staff.Phone = &data.Phone updated = true } if data.Office != "" && (staff.Office == nil || *staff.Office == "") { staff.Office = &data.Office updated = true } if data.Position != "" && (staff.Position == nil || *staff.Position == "") { staff.Position = &data.Position updated = true } if data.PositionType != "" && (staff.PositionType == nil || *staff.PositionType == "") { staff.PositionType = &data.PositionType updated = true } if data.AcademicTitle != "" && (staff.AcademicTitle == nil || *staff.AcademicTitle == "") { staff.AcademicTitle = &data.AcademicTitle updated = true } if data.IsProfessor != nil { staff.IsProfessor = *data.IsProfessor updated = true } if data.TeamRole != "" && (staff.TeamRole == nil || *staff.TeamRole == "") { staff.TeamRole = &data.TeamRole updated = true } if len(data.ResearchInterests) > 0 && len(staff.ResearchInterests) == 0 { staff.ResearchInterests = data.ResearchInterests updated = true } if data.ResearchSummary != "" && (staff.ResearchSummary == nil || *staff.ResearchSummary == "") { staff.ResearchSummary = &data.ResearchSummary updated = true } if data.ORCID != "" && (staff.ORCID == nil || *staff.ORCID == "") { staff.ORCID = &data.ORCID updated = true } if data.GoogleScholarID != "" && (staff.GoogleScholarID == nil || *staff.GoogleScholarID == "") { staff.GoogleScholarID = &data.GoogleScholarID updated = true } if data.ResearchgateURL != "" && (staff.ResearchgateURL == nil || *staff.ResearchgateURL == "") { staff.ResearchgateURL = &data.ResearchgateURL updated = true } if data.LinkedInURL != "" && (staff.LinkedInURL == nil || *staff.LinkedInURL == "") { staff.LinkedInURL = &data.LinkedInURL updated = true } if data.PersonalWebsite != "" && (staff.PersonalWebsite == nil || *staff.PersonalWebsite == "") { staff.PersonalWebsite = &data.PersonalWebsite updated = true } if data.PhotoURL != "" && (staff.PhotoURL == nil || *staff.PhotoURL == "") { staff.PhotoURL = &data.PhotoURL updated = true } // Try to resolve supervisor by name if data.SupervisorName != "" && staff.SupervisorID == nil { // Search for supervisor in same university supervisorParams := database.StaffSearchParams{ Query: data.SupervisorName, UniversityID: &staff.UniversityID, Limit: 1, } result, err := h.repo.SearchStaff(c.Request.Context(), supervisorParams) if err == nil && len(result.Staff) > 0 { staff.SupervisorID = &result.Staff[0].ID updated = true } } // Update last verified timestamp now := time.Now() staff.LastVerified = &now if updated { err = h.repo.CreateStaff(c.Request.Context(), staff) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update: " + err.Error()}) return } } c.JSON(http.StatusOK, gin.H{ "status": "success", "updated": updated, "staff_id": staff.ID, }) } // RegisterAIExtractionRoutes registers AI extraction routes func (h *AIExtractionHandlers) RegisterRoutes(r *gin.RouterGroup) { ai := r.Group("/ai/extraction") // Profile extraction endpoints ai.GET("/pending", h.GetPendingProfiles) ai.POST("/submit", h.SubmitExtractedData) ai.POST("/submit-batch", h.SubmitBatchExtractedData) // Institute hierarchy endpoints ai.GET("/institutes", h.GetInstitutePages) ai.POST("/institutes/submit", h.SubmitInstituteHierarchy) }