All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
425 lines
13 KiB
Go
425 lines
13 KiB
Go
// Package orchestrator implements multi-phase university crawling with queue management
|
|
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// Audience represents a target audience filter configuration
|
|
type Audience struct {
|
|
ID uuid.UUID `json:"id"`
|
|
Name string `json:"name"`
|
|
Description string `json:"description,omitempty"`
|
|
Filters AudienceFilters `json:"filters"`
|
|
MemberCount int `json:"member_count"`
|
|
LastCountUpdate *time.Time `json:"last_count_update,omitempty"`
|
|
CreatedBy string `json:"created_by,omitempty"`
|
|
IsActive bool `json:"is_active"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// AudienceFilters defines the filter criteria for an audience
|
|
type AudienceFilters struct {
|
|
PositionTypes []string `json:"position_types,omitempty"` // professor, researcher, lecturer
|
|
SubjectAreas []uuid.UUID `json:"subject_areas,omitempty"` // Subject area UUIDs
|
|
States []string `json:"states,omitempty"` // BW, BY, etc.
|
|
UniTypes []string `json:"uni_types,omitempty"` // UNI, PH, HAW
|
|
Universities []uuid.UUID `json:"universities,omitempty"` // University UUIDs
|
|
HasEmail *bool `json:"has_email,omitempty"`
|
|
IsActive *bool `json:"is_active,omitempty"`
|
|
Keywords []string `json:"keywords,omitempty"` // Keywords in name/research
|
|
}
|
|
|
|
// AudienceExport tracks exports of audience data
|
|
type AudienceExport struct {
|
|
ID uuid.UUID `json:"id"`
|
|
AudienceID uuid.UUID `json:"audience_id"`
|
|
ExportType string `json:"export_type"` // csv, json, email_list
|
|
RecordCount int `json:"record_count"`
|
|
FilePath string `json:"file_path,omitempty"`
|
|
ExportedBy string `json:"exported_by,omitempty"`
|
|
Purpose string `json:"purpose,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
}
|
|
|
|
// AudienceMember represents a staff member in an audience preview
|
|
type AudienceMember struct {
|
|
ID uuid.UUID `json:"id"`
|
|
Name string `json:"name"`
|
|
Email string `json:"email,omitempty"`
|
|
Position string `json:"position,omitempty"`
|
|
University string `json:"university"`
|
|
Department string `json:"department,omitempty"`
|
|
SubjectArea string `json:"subject_area,omitempty"`
|
|
PublicationCount int `json:"publication_count"`
|
|
}
|
|
|
|
// AudienceRepository extends Repository with audience operations
|
|
type AudienceRepository interface {
|
|
// Audience CRUD
|
|
CreateAudience(ctx context.Context, audience *Audience) error
|
|
GetAudience(ctx context.Context, id uuid.UUID) (*Audience, error)
|
|
ListAudiences(ctx context.Context, activeOnly bool) ([]Audience, error)
|
|
UpdateAudience(ctx context.Context, audience *Audience) error
|
|
DeleteAudience(ctx context.Context, id uuid.UUID) error
|
|
|
|
// Audience members
|
|
GetAudienceMembers(ctx context.Context, id uuid.UUID, limit, offset int) ([]AudienceMember, int, error)
|
|
UpdateAudienceCount(ctx context.Context, id uuid.UUID) (int, error)
|
|
|
|
// Exports
|
|
CreateExport(ctx context.Context, export *AudienceExport) error
|
|
ListExports(ctx context.Context, audienceID uuid.UUID) ([]AudienceExport, error)
|
|
}
|
|
|
|
// ============================================================================
|
|
// POSTGRES IMPLEMENTATION
|
|
// ============================================================================
|
|
|
|
// CreateAudience creates a new audience
|
|
func (r *PostgresRepository) CreateAudience(ctx context.Context, audience *Audience) error {
|
|
filtersJSON, err := json.Marshal(audience.Filters)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal filters: %w", err)
|
|
}
|
|
|
|
query := `
|
|
INSERT INTO audiences (name, description, filters, created_by, is_active)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
RETURNING id, member_count, created_at, updated_at
|
|
`
|
|
|
|
return r.pool.QueryRow(ctx, query,
|
|
audience.Name,
|
|
audience.Description,
|
|
filtersJSON,
|
|
audience.CreatedBy,
|
|
audience.IsActive,
|
|
).Scan(&audience.ID, &audience.MemberCount, &audience.CreatedAt, &audience.UpdatedAt)
|
|
}
|
|
|
|
// GetAudience retrieves an audience by ID
|
|
func (r *PostgresRepository) GetAudience(ctx context.Context, id uuid.UUID) (*Audience, error) {
|
|
query := `
|
|
SELECT id, name, description, filters, member_count, last_count_update,
|
|
created_by, is_active, created_at, updated_at
|
|
FROM audiences
|
|
WHERE id = $1
|
|
`
|
|
|
|
var audience Audience
|
|
var filtersJSON []byte
|
|
|
|
err := r.pool.QueryRow(ctx, query, id).Scan(
|
|
&audience.ID, &audience.Name, &audience.Description, &filtersJSON,
|
|
&audience.MemberCount, &audience.LastCountUpdate,
|
|
&audience.CreatedBy, &audience.IsActive,
|
|
&audience.CreatedAt, &audience.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := json.Unmarshal(filtersJSON, &audience.Filters); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal filters: %w", err)
|
|
}
|
|
|
|
return &audience, nil
|
|
}
|
|
|
|
// ListAudiences lists all audiences
|
|
func (r *PostgresRepository) ListAudiences(ctx context.Context, activeOnly bool) ([]Audience, error) {
|
|
query := `
|
|
SELECT id, name, description, filters, member_count, last_count_update,
|
|
created_by, is_active, created_at, updated_at
|
|
FROM audiences
|
|
`
|
|
if activeOnly {
|
|
query += ` WHERE is_active = TRUE`
|
|
}
|
|
query += ` ORDER BY created_at DESC`
|
|
|
|
rows, err := r.pool.Query(ctx, query)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to query audiences: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var audiences []Audience
|
|
for rows.Next() {
|
|
var audience Audience
|
|
var filtersJSON []byte
|
|
|
|
if err := rows.Scan(
|
|
&audience.ID, &audience.Name, &audience.Description, &filtersJSON,
|
|
&audience.MemberCount, &audience.LastCountUpdate,
|
|
&audience.CreatedBy, &audience.IsActive,
|
|
&audience.CreatedAt, &audience.UpdatedAt,
|
|
); err != nil {
|
|
return nil, fmt.Errorf("failed to scan audience: %w", err)
|
|
}
|
|
|
|
if err := json.Unmarshal(filtersJSON, &audience.Filters); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal filters: %w", err)
|
|
}
|
|
|
|
audiences = append(audiences, audience)
|
|
}
|
|
|
|
return audiences, rows.Err()
|
|
}
|
|
|
|
// UpdateAudience updates an existing audience
|
|
func (r *PostgresRepository) UpdateAudience(ctx context.Context, audience *Audience) error {
|
|
filtersJSON, err := json.Marshal(audience.Filters)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal filters: %w", err)
|
|
}
|
|
|
|
query := `
|
|
UPDATE audiences
|
|
SET name = $2, description = $3, filters = $4, is_active = $5, updated_at = NOW()
|
|
WHERE id = $1
|
|
RETURNING updated_at
|
|
`
|
|
|
|
return r.pool.QueryRow(ctx, query,
|
|
audience.ID,
|
|
audience.Name,
|
|
audience.Description,
|
|
filtersJSON,
|
|
audience.IsActive,
|
|
).Scan(&audience.UpdatedAt)
|
|
}
|
|
|
|
// DeleteAudience soft-deletes an audience (sets is_active = false)
|
|
func (r *PostgresRepository) DeleteAudience(ctx context.Context, id uuid.UUID) error {
|
|
query := `UPDATE audiences SET is_active = FALSE, updated_at = NOW() WHERE id = $1`
|
|
_, err := r.pool.Exec(ctx, query, id)
|
|
return err
|
|
}
|
|
|
|
// GetAudienceMembers retrieves members matching the audience filters
|
|
func (r *PostgresRepository) GetAudienceMembers(ctx context.Context, id uuid.UUID, limit, offset int) ([]AudienceMember, int, error) {
|
|
// First get the audience filters
|
|
audience, err := r.GetAudience(ctx, id)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("failed to get audience: %w", err)
|
|
}
|
|
|
|
// Build dynamic query based on filters
|
|
query, args := r.buildAudienceMemberQuery(audience.Filters, limit, offset, false)
|
|
countQuery, countArgs := r.buildAudienceMemberQuery(audience.Filters, 0, 0, true)
|
|
|
|
// Get total count
|
|
var totalCount int
|
|
if err := r.pool.QueryRow(ctx, countQuery, countArgs...).Scan(&totalCount); err != nil {
|
|
return nil, 0, fmt.Errorf("failed to count members: %w", err)
|
|
}
|
|
|
|
// Get members
|
|
rows, err := r.pool.Query(ctx, query, args...)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("failed to query members: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var members []AudienceMember
|
|
for rows.Next() {
|
|
var m AudienceMember
|
|
if err := rows.Scan(
|
|
&m.ID, &m.Name, &m.Email, &m.Position,
|
|
&m.University, &m.Department, &m.SubjectArea, &m.PublicationCount,
|
|
); err != nil {
|
|
return nil, 0, fmt.Errorf("failed to scan member: %w", err)
|
|
}
|
|
members = append(members, m)
|
|
}
|
|
|
|
return members, totalCount, rows.Err()
|
|
}
|
|
|
|
// buildAudienceMemberQuery constructs a SQL query for audience members
|
|
func (r *PostgresRepository) buildAudienceMemberQuery(filters AudienceFilters, limit, offset int, countOnly bool) (string, []interface{}) {
|
|
var args []interface{}
|
|
argNum := 1
|
|
|
|
var selectClause string
|
|
if countOnly {
|
|
selectClause = "SELECT COUNT(*)"
|
|
} else {
|
|
selectClause = `
|
|
SELECT
|
|
s.id,
|
|
COALESCE(s.title || ' ', '') || s.first_name || ' ' || s.last_name as name,
|
|
COALESCE(s.email, '') as email,
|
|
COALESCE(s.position_type, '') as position,
|
|
u.name as university,
|
|
COALESCE(d.name, '') as department,
|
|
COALESCE(sa.name, '') as subject_area,
|
|
(SELECT COUNT(*) FROM staff_publications sp WHERE sp.staff_id = s.id) as publication_count
|
|
`
|
|
}
|
|
|
|
query := selectClause + `
|
|
FROM university_staff s
|
|
JOIN universities u ON s.university_id = u.id
|
|
LEFT JOIN departments d ON s.department_id = d.id
|
|
LEFT JOIN subject_areas sa ON s.subject_area_id = sa.id
|
|
WHERE 1=1
|
|
`
|
|
|
|
// Position types filter
|
|
if len(filters.PositionTypes) > 0 {
|
|
query += fmt.Sprintf(" AND s.position_type = ANY($%d)", argNum)
|
|
args = append(args, filters.PositionTypes)
|
|
argNum++
|
|
}
|
|
|
|
// Subject areas filter
|
|
if len(filters.SubjectAreas) > 0 {
|
|
query += fmt.Sprintf(" AND s.subject_area_id = ANY($%d)", argNum)
|
|
args = append(args, filters.SubjectAreas)
|
|
argNum++
|
|
}
|
|
|
|
// States filter
|
|
if len(filters.States) > 0 {
|
|
query += fmt.Sprintf(" AND u.state = ANY($%d)", argNum)
|
|
args = append(args, filters.States)
|
|
argNum++
|
|
}
|
|
|
|
// Uni types filter
|
|
if len(filters.UniTypes) > 0 {
|
|
query += fmt.Sprintf(" AND u.uni_type = ANY($%d)", argNum)
|
|
args = append(args, filters.UniTypes)
|
|
argNum++
|
|
}
|
|
|
|
// Universities filter
|
|
if len(filters.Universities) > 0 {
|
|
query += fmt.Sprintf(" AND s.university_id = ANY($%d)", argNum)
|
|
args = append(args, filters.Universities)
|
|
argNum++
|
|
}
|
|
|
|
// Has email filter
|
|
if filters.HasEmail != nil && *filters.HasEmail {
|
|
query += " AND s.email IS NOT NULL AND s.email != ''"
|
|
}
|
|
|
|
// Is active filter
|
|
if filters.IsActive != nil && *filters.IsActive {
|
|
query += " AND s.is_active = TRUE"
|
|
}
|
|
|
|
// Keywords filter (search in name and research_areas)
|
|
if len(filters.Keywords) > 0 {
|
|
for _, keyword := range filters.Keywords {
|
|
query += fmt.Sprintf(" AND (s.first_name ILIKE $%d OR s.last_name ILIKE $%d OR s.research_areas ILIKE $%d)", argNum, argNum, argNum)
|
|
args = append(args, "%"+keyword+"%")
|
|
argNum++
|
|
}
|
|
}
|
|
|
|
if !countOnly {
|
|
query += " ORDER BY s.last_name, s.first_name"
|
|
|
|
if limit > 0 {
|
|
query += fmt.Sprintf(" LIMIT $%d", argNum)
|
|
args = append(args, limit)
|
|
argNum++
|
|
}
|
|
|
|
if offset > 0 {
|
|
query += fmt.Sprintf(" OFFSET $%d", argNum)
|
|
args = append(args, offset)
|
|
}
|
|
}
|
|
|
|
return query, args
|
|
}
|
|
|
|
// UpdateAudienceCount updates the cached member count for an audience
|
|
func (r *PostgresRepository) UpdateAudienceCount(ctx context.Context, id uuid.UUID) (int, error) {
|
|
// Get the audience filters
|
|
audience, err := r.GetAudience(ctx, id)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("failed to get audience: %w", err)
|
|
}
|
|
|
|
// Count members
|
|
countQuery, countArgs := r.buildAudienceMemberQuery(audience.Filters, 0, 0, true)
|
|
var count int
|
|
if err := r.pool.QueryRow(ctx, countQuery, countArgs...).Scan(&count); err != nil {
|
|
return 0, fmt.Errorf("failed to count members: %w", err)
|
|
}
|
|
|
|
// Update the cached count
|
|
updateQuery := `
|
|
UPDATE audiences
|
|
SET member_count = $2, last_count_update = NOW(), updated_at = NOW()
|
|
WHERE id = $1
|
|
`
|
|
if _, err := r.pool.Exec(ctx, updateQuery, id, count); err != nil {
|
|
return 0, fmt.Errorf("failed to update count: %w", err)
|
|
}
|
|
|
|
return count, nil
|
|
}
|
|
|
|
// CreateExport creates a new export record
|
|
func (r *PostgresRepository) CreateExport(ctx context.Context, export *AudienceExport) error {
|
|
query := `
|
|
INSERT INTO audience_exports (audience_id, export_type, record_count, file_path, exported_by, purpose)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
RETURNING id, created_at
|
|
`
|
|
|
|
return r.pool.QueryRow(ctx, query,
|
|
export.AudienceID,
|
|
export.ExportType,
|
|
export.RecordCount,
|
|
export.FilePath,
|
|
export.ExportedBy,
|
|
export.Purpose,
|
|
).Scan(&export.ID, &export.CreatedAt)
|
|
}
|
|
|
|
// ListExports lists exports for an audience
|
|
func (r *PostgresRepository) ListExports(ctx context.Context, audienceID uuid.UUID) ([]AudienceExport, error) {
|
|
query := `
|
|
SELECT id, audience_id, export_type, record_count, file_path, exported_by, purpose, created_at
|
|
FROM audience_exports
|
|
WHERE audience_id = $1
|
|
ORDER BY created_at DESC
|
|
`
|
|
|
|
rows, err := r.pool.Query(ctx, query, audienceID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to query exports: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var exports []AudienceExport
|
|
for rows.Next() {
|
|
var e AudienceExport
|
|
if err := rows.Scan(
|
|
&e.ID, &e.AudienceID, &e.ExportType, &e.RecordCount,
|
|
&e.FilePath, &e.ExportedBy, &e.Purpose, &e.CreatedAt,
|
|
); err != nil {
|
|
return nil, fmt.Errorf("failed to scan export: %w", err)
|
|
}
|
|
exports = append(exports, e)
|
|
}
|
|
|
|
return exports, rows.Err()
|
|
}
|