// Package orchestrator implements multi-phase university crawling with queue management package orchestrator import ( "context" "encoding/json" "fmt" "time" "github.com/google/uuid" ) // Audience represents a target audience filter configuration type Audience struct { ID uuid.UUID `json:"id"` Name string `json:"name"` Description string `json:"description,omitempty"` Filters AudienceFilters `json:"filters"` MemberCount int `json:"member_count"` LastCountUpdate *time.Time `json:"last_count_update,omitempty"` CreatedBy string `json:"created_by,omitempty"` IsActive bool `json:"is_active"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } // AudienceFilters defines the filter criteria for an audience type AudienceFilters struct { PositionTypes []string `json:"position_types,omitempty"` // professor, researcher, lecturer SubjectAreas []uuid.UUID `json:"subject_areas,omitempty"` // Subject area UUIDs States []string `json:"states,omitempty"` // BW, BY, etc. UniTypes []string `json:"uni_types,omitempty"` // UNI, PH, HAW Universities []uuid.UUID `json:"universities,omitempty"` // University UUIDs HasEmail *bool `json:"has_email,omitempty"` IsActive *bool `json:"is_active,omitempty"` Keywords []string `json:"keywords,omitempty"` // Keywords in name/research } // AudienceExport tracks exports of audience data type AudienceExport struct { ID uuid.UUID `json:"id"` AudienceID uuid.UUID `json:"audience_id"` ExportType string `json:"export_type"` // csv, json, email_list RecordCount int `json:"record_count"` FilePath string `json:"file_path,omitempty"` ExportedBy string `json:"exported_by,omitempty"` Purpose string `json:"purpose,omitempty"` CreatedAt time.Time `json:"created_at"` } // AudienceMember represents a staff member in an audience preview type AudienceMember struct { ID uuid.UUID `json:"id"` Name string `json:"name"` Email string `json:"email,omitempty"` Position string `json:"position,omitempty"` University string `json:"university"` Department string `json:"department,omitempty"` SubjectArea string `json:"subject_area,omitempty"` PublicationCount int `json:"publication_count"` } // AudienceRepository extends Repository with audience operations type AudienceRepository interface { // Audience CRUD CreateAudience(ctx context.Context, audience *Audience) error GetAudience(ctx context.Context, id uuid.UUID) (*Audience, error) ListAudiences(ctx context.Context, activeOnly bool) ([]Audience, error) UpdateAudience(ctx context.Context, audience *Audience) error DeleteAudience(ctx context.Context, id uuid.UUID) error // Audience members GetAudienceMembers(ctx context.Context, id uuid.UUID, limit, offset int) ([]AudienceMember, int, error) UpdateAudienceCount(ctx context.Context, id uuid.UUID) (int, error) // Exports CreateExport(ctx context.Context, export *AudienceExport) error ListExports(ctx context.Context, audienceID uuid.UUID) ([]AudienceExport, error) } // ============================================================================ // POSTGRES IMPLEMENTATION // ============================================================================ // CreateAudience creates a new audience func (r *PostgresRepository) CreateAudience(ctx context.Context, audience *Audience) error { filtersJSON, err := json.Marshal(audience.Filters) if err != nil { return fmt.Errorf("failed to marshal filters: %w", err) } query := ` INSERT INTO audiences (name, description, filters, created_by, is_active) VALUES ($1, $2, $3, $4, $5) RETURNING id, member_count, created_at, updated_at ` return r.pool.QueryRow(ctx, query, audience.Name, audience.Description, filtersJSON, audience.CreatedBy, audience.IsActive, ).Scan(&audience.ID, &audience.MemberCount, &audience.CreatedAt, &audience.UpdatedAt) } // GetAudience retrieves an audience by ID func (r *PostgresRepository) GetAudience(ctx context.Context, id uuid.UUID) (*Audience, error) { query := ` SELECT id, name, description, filters, member_count, last_count_update, created_by, is_active, created_at, updated_at FROM audiences WHERE id = $1 ` var audience Audience var filtersJSON []byte err := r.pool.QueryRow(ctx, query, id).Scan( &audience.ID, &audience.Name, &audience.Description, &filtersJSON, &audience.MemberCount, &audience.LastCountUpdate, &audience.CreatedBy, &audience.IsActive, &audience.CreatedAt, &audience.UpdatedAt, ) if err != nil { return nil, err } if err := json.Unmarshal(filtersJSON, &audience.Filters); err != nil { return nil, fmt.Errorf("failed to unmarshal filters: %w", err) } return &audience, nil } // ListAudiences lists all audiences func (r *PostgresRepository) ListAudiences(ctx context.Context, activeOnly bool) ([]Audience, error) { query := ` SELECT id, name, description, filters, member_count, last_count_update, created_by, is_active, created_at, updated_at FROM audiences ` if activeOnly { query += ` WHERE is_active = TRUE` } query += ` ORDER BY created_at DESC` rows, err := r.pool.Query(ctx, query) if err != nil { return nil, fmt.Errorf("failed to query audiences: %w", err) } defer rows.Close() var audiences []Audience for rows.Next() { var audience Audience var filtersJSON []byte if err := rows.Scan( &audience.ID, &audience.Name, &audience.Description, &filtersJSON, &audience.MemberCount, &audience.LastCountUpdate, &audience.CreatedBy, &audience.IsActive, &audience.CreatedAt, &audience.UpdatedAt, ); err != nil { return nil, fmt.Errorf("failed to scan audience: %w", err) } if err := json.Unmarshal(filtersJSON, &audience.Filters); err != nil { return nil, fmt.Errorf("failed to unmarshal filters: %w", err) } audiences = append(audiences, audience) } return audiences, rows.Err() } // UpdateAudience updates an existing audience func (r *PostgresRepository) UpdateAudience(ctx context.Context, audience *Audience) error { filtersJSON, err := json.Marshal(audience.Filters) if err != nil { return fmt.Errorf("failed to marshal filters: %w", err) } query := ` UPDATE audiences SET name = $2, description = $3, filters = $4, is_active = $5, updated_at = NOW() WHERE id = $1 RETURNING updated_at ` return r.pool.QueryRow(ctx, query, audience.ID, audience.Name, audience.Description, filtersJSON, audience.IsActive, ).Scan(&audience.UpdatedAt) } // DeleteAudience soft-deletes an audience (sets is_active = false) func (r *PostgresRepository) DeleteAudience(ctx context.Context, id uuid.UUID) error { query := `UPDATE audiences SET is_active = FALSE, updated_at = NOW() WHERE id = $1` _, err := r.pool.Exec(ctx, query, id) return err } // GetAudienceMembers retrieves members matching the audience filters func (r *PostgresRepository) GetAudienceMembers(ctx context.Context, id uuid.UUID, limit, offset int) ([]AudienceMember, int, error) { // First get the audience filters audience, err := r.GetAudience(ctx, id) if err != nil { return nil, 0, fmt.Errorf("failed to get audience: %w", err) } // Build dynamic query based on filters query, args := r.buildAudienceMemberQuery(audience.Filters, limit, offset, false) countQuery, countArgs := r.buildAudienceMemberQuery(audience.Filters, 0, 0, true) // Get total count var totalCount int if err := r.pool.QueryRow(ctx, countQuery, countArgs...).Scan(&totalCount); err != nil { return nil, 0, fmt.Errorf("failed to count members: %w", err) } // Get members rows, err := r.pool.Query(ctx, query, args...) if err != nil { return nil, 0, fmt.Errorf("failed to query members: %w", err) } defer rows.Close() var members []AudienceMember for rows.Next() { var m AudienceMember if err := rows.Scan( &m.ID, &m.Name, &m.Email, &m.Position, &m.University, &m.Department, &m.SubjectArea, &m.PublicationCount, ); err != nil { return nil, 0, fmt.Errorf("failed to scan member: %w", err) } members = append(members, m) } return members, totalCount, rows.Err() } // buildAudienceMemberQuery constructs a SQL query for audience members func (r *PostgresRepository) buildAudienceMemberQuery(filters AudienceFilters, limit, offset int, countOnly bool) (string, []interface{}) { var args []interface{} argNum := 1 var selectClause string if countOnly { selectClause = "SELECT COUNT(*)" } else { selectClause = ` SELECT s.id, COALESCE(s.title || ' ', '') || s.first_name || ' ' || s.last_name as name, COALESCE(s.email, '') as email, COALESCE(s.position_type, '') as position, u.name as university, COALESCE(d.name, '') as department, COALESCE(sa.name, '') as subject_area, (SELECT COUNT(*) FROM staff_publications sp WHERE sp.staff_id = s.id) as publication_count ` } query := selectClause + ` FROM university_staff s JOIN universities u ON s.university_id = u.id LEFT JOIN departments d ON s.department_id = d.id LEFT JOIN subject_areas sa ON s.subject_area_id = sa.id WHERE 1=1 ` // Position types filter if len(filters.PositionTypes) > 0 { query += fmt.Sprintf(" AND s.position_type = ANY($%d)", argNum) args = append(args, filters.PositionTypes) argNum++ } // Subject areas filter if len(filters.SubjectAreas) > 0 { query += fmt.Sprintf(" AND s.subject_area_id = ANY($%d)", argNum) args = append(args, filters.SubjectAreas) argNum++ } // States filter if len(filters.States) > 0 { query += fmt.Sprintf(" AND u.state = ANY($%d)", argNum) args = append(args, filters.States) argNum++ } // Uni types filter if len(filters.UniTypes) > 0 { query += fmt.Sprintf(" AND u.uni_type = ANY($%d)", argNum) args = append(args, filters.UniTypes) argNum++ } // Universities filter if len(filters.Universities) > 0 { query += fmt.Sprintf(" AND s.university_id = ANY($%d)", argNum) args = append(args, filters.Universities) argNum++ } // Has email filter if filters.HasEmail != nil && *filters.HasEmail { query += " AND s.email IS NOT NULL AND s.email != ''" } // Is active filter if filters.IsActive != nil && *filters.IsActive { query += " AND s.is_active = TRUE" } // Keywords filter (search in name and research_areas) if len(filters.Keywords) > 0 { for _, keyword := range filters.Keywords { query += fmt.Sprintf(" AND (s.first_name ILIKE $%d OR s.last_name ILIKE $%d OR s.research_areas ILIKE $%d)", argNum, argNum, argNum) args = append(args, "%"+keyword+"%") argNum++ } } if !countOnly { query += " ORDER BY s.last_name, s.first_name" if limit > 0 { query += fmt.Sprintf(" LIMIT $%d", argNum) args = append(args, limit) argNum++ } if offset > 0 { query += fmt.Sprintf(" OFFSET $%d", argNum) args = append(args, offset) } } return query, args } // UpdateAudienceCount updates the cached member count for an audience func (r *PostgresRepository) UpdateAudienceCount(ctx context.Context, id uuid.UUID) (int, error) { // Get the audience filters audience, err := r.GetAudience(ctx, id) if err != nil { return 0, fmt.Errorf("failed to get audience: %w", err) } // Count members countQuery, countArgs := r.buildAudienceMemberQuery(audience.Filters, 0, 0, true) var count int if err := r.pool.QueryRow(ctx, countQuery, countArgs...).Scan(&count); err != nil { return 0, fmt.Errorf("failed to count members: %w", err) } // Update the cached count updateQuery := ` UPDATE audiences SET member_count = $2, last_count_update = NOW(), updated_at = NOW() WHERE id = $1 ` if _, err := r.pool.Exec(ctx, updateQuery, id, count); err != nil { return 0, fmt.Errorf("failed to update count: %w", err) } return count, nil } // CreateExport creates a new export record func (r *PostgresRepository) CreateExport(ctx context.Context, export *AudienceExport) error { query := ` INSERT INTO audience_exports (audience_id, export_type, record_count, file_path, exported_by, purpose) VALUES ($1, $2, $3, $4, $5, $6) RETURNING id, created_at ` return r.pool.QueryRow(ctx, query, export.AudienceID, export.ExportType, export.RecordCount, export.FilePath, export.ExportedBy, export.Purpose, ).Scan(&export.ID, &export.CreatedAt) } // ListExports lists exports for an audience func (r *PostgresRepository) ListExports(ctx context.Context, audienceID uuid.UUID) ([]AudienceExport, error) { query := ` SELECT id, audience_id, export_type, record_count, file_path, exported_by, purpose, created_at FROM audience_exports WHERE audience_id = $1 ORDER BY created_at DESC ` rows, err := r.pool.Query(ctx, query, audienceID) if err != nil { return nil, fmt.Errorf("failed to query exports: %w", err) } defer rows.Close() var exports []AudienceExport for rows.Next() { var e AudienceExport if err := rows.Scan( &e.ID, &e.AudienceID, &e.ExportType, &e.RecordCount, &e.FilePath, &e.ExportedBy, &e.Purpose, &e.CreatedAt, ); err != nil { return nil, fmt.Errorf("failed to scan export: %w", err) } exports = append(exports, e) } return exports, rows.Err() }