package handlers import ( "net/http" "time" "github.com/gin-gonic/gin" "github.com/google/uuid" "github.com/breakpilot/edu-search-service/internal/database" ) // SubmitBatchExtractedData saves multiple AI-extracted profile data items // POST /api/v1/ai/extraction/submit-batch func (h *AIExtractionHandlers) SubmitBatchExtractedData(c *gin.Context) { var batch struct { Items []ExtractedProfileData `json:"items" binding:"required"` } if err := c.ShouldBindJSON(&batch); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request: " + err.Error()}) return } results := make([]gin.H, 0, len(batch.Items)) successCount := 0 errorCount := 0 for _, item := range batch.Items { // Get existing staff record staff, err := h.repo.GetStaff(c.Request.Context(), item.StaffID) if err != nil { results = append(results, gin.H{ "staff_id": item.StaffID, "status": "error", "error": "Staff not found", }) errorCount++ continue } // Apply updates (same logic as single submit) updated := false if item.Email != "" && (staff.Email == nil || *staff.Email == "") { staff.Email = &item.Email updated = true } if item.Phone != "" && (staff.Phone == nil || *staff.Phone == "") { staff.Phone = &item.Phone updated = true } if item.Office != "" && (staff.Office == nil || *staff.Office == "") { staff.Office = &item.Office updated = true } if item.Position != "" && (staff.Position == nil || *staff.Position == "") { staff.Position = &item.Position updated = true } if item.PositionType != "" && (staff.PositionType == nil || *staff.PositionType == "") { staff.PositionType = &item.PositionType updated = true } if item.TeamRole != "" && (staff.TeamRole == nil || *staff.TeamRole == "") { staff.TeamRole = &item.TeamRole updated = true } if len(item.ResearchInterests) > 0 && len(staff.ResearchInterests) == 0 { staff.ResearchInterests = item.ResearchInterests updated = true } if item.ORCID != "" && (staff.ORCID == nil || *staff.ORCID == "") { staff.ORCID = &item.ORCID updated = true } // Update last verified now := time.Now() staff.LastVerified = &now if updated { err = h.repo.CreateStaff(c.Request.Context(), staff) if err != nil { results = append(results, gin.H{ "staff_id": item.StaffID, "status": "error", "error": err.Error(), }) errorCount++ continue } } results = append(results, gin.H{ "staff_id": item.StaffID, "status": "success", "updated": updated, }) successCount++ } c.JSON(http.StatusOK, gin.H{ "results": results, "success_count": successCount, "error_count": errorCount, "total": len(batch.Items), }) } // InstituteHierarchyTask represents an institute page to crawl for hierarchy type InstituteHierarchyTask struct { InstituteURL string `json:"institute_url"` InstituteName string `json:"institute_name,omitempty"` UniversityID uuid.UUID `json:"university_id"` } // GetInstitutePages returns institute pages that need hierarchy crawling // GET /api/v1/ai/extraction/institutes?university_id=... func (h *AIExtractionHandlers) GetInstitutePages(c *gin.Context) { var universityID *uuid.UUID if uniIDStr := c.Query("university_id"); uniIDStr != "" { id, err := uuid.Parse(uniIDStr) if err == nil { universityID = &id } } // Get unique institute/department URLs from staff profiles params := database.StaffSearchParams{ UniversityID: universityID, Limit: 1000, } result, err := h.repo.SearchStaff(c.Request.Context(), params) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } // Collect unique source URLs (these are typically department pages) urlSet := make(map[string]bool) var tasks []InstituteHierarchyTask for _, staff := range result.Staff { if staff.SourceURL != nil && *staff.SourceURL != "" { url := *staff.SourceURL if !urlSet[url] { urlSet[url] = true tasks = append(tasks, InstituteHierarchyTask{ InstituteURL: url, UniversityID: staff.UniversityID, }) } } } c.JSON(http.StatusOK, gin.H{ "institutes": tasks, "total": len(tasks), }) } // InstituteHierarchyData represents hierarchy data extracted from an institute page type InstituteHierarchyData struct { InstituteURL string `json:"institute_url" binding:"required"` UniversityID uuid.UUID `json:"university_id" binding:"required"` InstituteName string `json:"institute_name,omitempty"` // Leadership LeaderName string `json:"leader_name,omitempty"` LeaderTitle string `json:"leader_title,omitempty"` // e.g., "Professor", "Lehrstuhlinhaber" // Staff organization StaffGroups []struct { Role string `json:"role"` // e.g., "Leitung", "Wissenschaftliche Mitarbeiter", "Sekretariat" Members []string `json:"members"` // Names of people in this group } `json:"staff_groups,omitempty"` // Teaching info (Lehrveranstaltungen) TeachingCourses []struct { Title string `json:"title"` Teacher string `json:"teacher,omitempty"` } `json:"teaching_courses,omitempty"` } // SubmitInstituteHierarchy saves hierarchy data from an institute page // POST /api/v1/ai/extraction/institutes/submit func (h *AIExtractionHandlers) SubmitInstituteHierarchy(c *gin.Context) { var data InstituteHierarchyData if err := c.ShouldBindJSON(&data); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request: " + err.Error()}) return } // Find or create department dept := &database.Department{ UniversityID: data.UniversityID, Name: data.InstituteName, } if data.InstituteURL != "" { dept.URL = &data.InstituteURL } err := h.repo.CreateDepartment(c.Request.Context(), dept) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create department: " + err.Error()}) return } // Find leader and set as supervisor for all staff in this institute var leaderID *uuid.UUID if data.LeaderName != "" { // Search for leader leaderParams := database.StaffSearchParams{ Query: data.LeaderName, UniversityID: &data.UniversityID, Limit: 1, } result, err := h.repo.SearchStaff(c.Request.Context(), leaderParams) if err == nil && len(result.Staff) > 0 { leaderID = &result.Staff[0].ID // Update leader with department and role leader := &result.Staff[0] leader.DepartmentID = &dept.ID roleLeitung := "leitung" leader.TeamRole = &roleLeitung leader.IsProfessor = true if data.LeaderTitle != "" { leader.AcademicTitle = &data.LeaderTitle } h.repo.CreateStaff(c.Request.Context(), leader) } } // Process staff groups updatedCount := 0 for _, group := range data.StaffGroups { for _, memberName := range group.Members { // Find staff member memberParams := database.StaffSearchParams{ Query: memberName, UniversityID: &data.UniversityID, Limit: 1, } result, err := h.repo.SearchStaff(c.Request.Context(), memberParams) if err != nil || len(result.Staff) == 0 { continue } member := &result.Staff[0] member.DepartmentID = &dept.ID member.TeamRole = &group.Role // Set supervisor if leader was found and this is not the leader if leaderID != nil && member.ID != *leaderID { member.SupervisorID = leaderID } h.repo.CreateStaff(c.Request.Context(), member) updatedCount++ } } c.JSON(http.StatusOK, gin.H{ "status": "success", "department_id": dept.ID, "leader_id": leaderID, "members_updated": updatedCount, }) }