package database import ( "context" "github.com/google/uuid" "github.com/jackc/pgx/v5" ) // Repository provides database operations for staff and publications type Repository struct { db *DB } // NewRepository creates a new repository func NewRepository(db *DB) *Repository { return &Repository{db: db} } // ============================================================================ // UNIVERSITIES // ============================================================================ // CreateUniversity creates a new university func (r *Repository) CreateUniversity(ctx context.Context, u *University) error { query := ` INSERT INTO universities (name, short_name, url, state, uni_type, staff_page_pattern) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (url) DO UPDATE SET name = EXCLUDED.name, short_name = EXCLUDED.short_name, state = EXCLUDED.state, uni_type = EXCLUDED.uni_type, staff_page_pattern = EXCLUDED.staff_page_pattern, updated_at = NOW() RETURNING id, created_at, updated_at ` return r.db.Pool.QueryRow(ctx, query, u.Name, u.ShortName, u.URL, u.State, u.UniType, u.StaffPagePattern, ).Scan(&u.ID, &u.CreatedAt, &u.UpdatedAt) } // GetUniversity retrieves a university by ID func (r *Repository) GetUniversity(ctx context.Context, id uuid.UUID) (*University, error) { query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at FROM universities WHERE id = $1` u := &University{} err := r.db.Pool.QueryRow(ctx, query, id).Scan( &u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType, &u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt, ) if err == pgx.ErrNoRows { return nil, nil } if err != nil { return nil, err } return u, nil } // GetUniversityByID is an alias for GetUniversity (for interface compatibility) func (r *Repository) GetUniversityByID(ctx context.Context, id uuid.UUID) (*University, error) { return r.GetUniversity(ctx, id) } // GetUniversityByURL retrieves a university by URL func (r *Repository) GetUniversityByURL(ctx context.Context, url string) (*University, error) { query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at FROM universities WHERE url = $1` u := &University{} err := r.db.Pool.QueryRow(ctx, query, url).Scan( &u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType, &u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt, ) if err != nil { return nil, err } return u, nil } // ListUniversities lists all universities func (r *Repository) ListUniversities(ctx context.Context) ([]University, error) { query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at FROM universities ORDER BY name` rows, err := r.db.Pool.Query(ctx, query) if err != nil { return nil, err } defer rows.Close() var universities []University for rows.Next() { var u University if err := rows.Scan( &u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType, &u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt, ); err != nil { return nil, err } universities = append(universities, u) } return universities, rows.Err() } // ============================================================================ // DEPARTMENTS // ============================================================================ // CreateDepartment creates or updates a department func (r *Repository) CreateDepartment(ctx context.Context, d *Department) error { query := ` INSERT INTO departments (university_id, name, name_en, url, category, parent_id) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (university_id, name) DO UPDATE SET name_en = EXCLUDED.name_en, url = EXCLUDED.url, category = EXCLUDED.category, parent_id = EXCLUDED.parent_id, updated_at = NOW() RETURNING id, created_at, updated_at ` return r.db.Pool.QueryRow(ctx, query, d.UniversityID, d.Name, d.NameEN, d.URL, d.Category, d.ParentID, ).Scan(&d.ID, &d.CreatedAt, &d.UpdatedAt) } // GetDepartmentByName retrieves a department by university and name func (r *Repository) GetDepartmentByName(ctx context.Context, uniID uuid.UUID, name string) (*Department, error) { query := `SELECT id, university_id, name, name_en, url, category, parent_id, created_at, updated_at FROM departments WHERE university_id = $1 AND name = $2` d := &Department{} err := r.db.Pool.QueryRow(ctx, query, uniID, name).Scan( &d.ID, &d.UniversityID, &d.Name, &d.NameEN, &d.URL, &d.Category, &d.ParentID, &d.CreatedAt, &d.UpdatedAt, ) if err != nil { return nil, err } return d, nil } // ============================================================================ // CRAWL STATUS // ============================================================================ // UpdateCrawlStatus updates crawl status for a university func (r *Repository) UpdateCrawlStatus(ctx context.Context, status *UniversityCrawlStatus) error { query := ` INSERT INTO university_crawl_status ( university_id, last_staff_crawl, staff_crawl_status, staff_count, staff_errors, last_pub_crawl, pub_crawl_status, pub_count, pub_errors, next_scheduled_crawl, crawl_priority ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) ON CONFLICT (university_id) DO UPDATE SET last_staff_crawl = EXCLUDED.last_staff_crawl, staff_crawl_status = EXCLUDED.staff_crawl_status, staff_count = EXCLUDED.staff_count, staff_errors = EXCLUDED.staff_errors, last_pub_crawl = EXCLUDED.last_pub_crawl, pub_crawl_status = EXCLUDED.pub_crawl_status, pub_count = EXCLUDED.pub_count, pub_errors = EXCLUDED.pub_errors, next_scheduled_crawl = EXCLUDED.next_scheduled_crawl, crawl_priority = EXCLUDED.crawl_priority, updated_at = NOW() ` _, err := r.db.Pool.Exec(ctx, query, status.UniversityID, status.LastStaffCrawl, status.StaffCrawlStatus, status.StaffCount, status.StaffErrors, status.LastPubCrawl, status.PubCrawlStatus, status.PubCount, status.PubErrors, status.NextScheduledCrawl, status.CrawlPriority, ) return err } // GetCrawlStatus retrieves crawl status for a university func (r *Repository) GetCrawlStatus(ctx context.Context, uniID uuid.UUID) (*UniversityCrawlStatus, error) { query := `SELECT * FROM university_crawl_status WHERE university_id = $1` s := &UniversityCrawlStatus{} err := r.db.Pool.QueryRow(ctx, query, uniID).Scan( &s.UniversityID, &s.LastStaffCrawl, &s.StaffCrawlStatus, &s.StaffCount, &s.StaffErrors, &s.LastPubCrawl, &s.PubCrawlStatus, &s.PubCount, &s.PubErrors, &s.NextScheduledCrawl, &s.CrawlPriority, &s.CreatedAt, &s.UpdatedAt, ) if err == pgx.ErrNoRows { return nil, nil } if err != nil { return nil, err } return s, nil } // ============================================================================ // STATS // ============================================================================ // GetStaffStats retrieves statistics about staff data func (r *Repository) GetStaffStats(ctx context.Context) (*StaffStats, error) { stats := &StaffStats{ ByState: make(map[string]int), ByUniType: make(map[string]int), ByPositionType: make(map[string]int), } // Basic counts queries := []struct { query string dest *int }{ {"SELECT COUNT(*) FROM university_staff WHERE is_active = true", &stats.TotalStaff}, {"SELECT COUNT(*) FROM university_staff WHERE is_professor = true AND is_active = true", &stats.TotalProfessors}, {"SELECT COUNT(*) FROM publications", &stats.TotalPublications}, {"SELECT COUNT(*) FROM universities", &stats.TotalUniversities}, } for _, q := range queries { if err := r.db.Pool.QueryRow(ctx, q.query).Scan(q.dest); err != nil { return nil, err } } // By state rows, err := r.db.Pool.Query(ctx, ` SELECT COALESCE(u.state, 'unknown'), COUNT(*) FROM university_staff s JOIN universities u ON s.university_id = u.id WHERE s.is_active = true GROUP BY u.state `) if err != nil { return nil, err } defer rows.Close() for rows.Next() { var state string var count int if err := rows.Scan(&state, &count); err != nil { return nil, err } stats.ByState[state] = count } // By uni type rows2, err := r.db.Pool.Query(ctx, ` SELECT COALESCE(u.uni_type, 'unknown'), COUNT(*) FROM university_staff s JOIN universities u ON s.university_id = u.id WHERE s.is_active = true GROUP BY u.uni_type `) if err != nil { return nil, err } defer rows2.Close() for rows2.Next() { var uniType string var count int if err := rows2.Scan(&uniType, &count); err != nil { return nil, err } stats.ByUniType[uniType] = count } // By position type rows3, err := r.db.Pool.Query(ctx, ` SELECT COALESCE(position_type, 'unknown'), COUNT(*) FROM university_staff WHERE is_active = true GROUP BY position_type `) if err != nil { return nil, err } defer rows3.Close() for rows3.Next() { var posType string var count int if err := rows3.Scan(&posType, &count); err != nil { return nil, err } stats.ByPositionType[posType] = count } return stats, nil }