Files
breakpilot-lehrer/edu-search-service/internal/database/repository.go
Benjamin Admin 9ba420fa91
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 29s
Fix: Remove broken getKlausurApiUrl and clean up empty lines
sed replacement left orphaned hostname references in story page
and empty lines in getApiBase functions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 16:02:04 +02:00

294 lines
8.9 KiB
Go

package database
import (
"context"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
)
// Repository provides database operations for staff and publications
type Repository struct {
db *DB
}
// NewRepository creates a new repository
func NewRepository(db *DB) *Repository {
return &Repository{db: db}
}
// ============================================================================
// UNIVERSITIES
// ============================================================================
// CreateUniversity creates a new university
func (r *Repository) CreateUniversity(ctx context.Context, u *University) error {
query := `
INSERT INTO universities (name, short_name, url, state, uni_type, staff_page_pattern)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (url) DO UPDATE SET
name = EXCLUDED.name,
short_name = EXCLUDED.short_name,
state = EXCLUDED.state,
uni_type = EXCLUDED.uni_type,
staff_page_pattern = EXCLUDED.staff_page_pattern,
updated_at = NOW()
RETURNING id, created_at, updated_at
`
return r.db.Pool.QueryRow(ctx, query,
u.Name, u.ShortName, u.URL, u.State, u.UniType, u.StaffPagePattern,
).Scan(&u.ID, &u.CreatedAt, &u.UpdatedAt)
}
// GetUniversity retrieves a university by ID
func (r *Repository) GetUniversity(ctx context.Context, id uuid.UUID) (*University, error) {
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
FROM universities WHERE id = $1`
u := &University{}
err := r.db.Pool.QueryRow(ctx, query, id).Scan(
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
)
if err == pgx.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return u, nil
}
// GetUniversityByID is an alias for GetUniversity (for interface compatibility)
func (r *Repository) GetUniversityByID(ctx context.Context, id uuid.UUID) (*University, error) {
return r.GetUniversity(ctx, id)
}
// GetUniversityByURL retrieves a university by URL
func (r *Repository) GetUniversityByURL(ctx context.Context, url string) (*University, error) {
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
FROM universities WHERE url = $1`
u := &University{}
err := r.db.Pool.QueryRow(ctx, query, url).Scan(
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
)
if err != nil {
return nil, err
}
return u, nil
}
// ListUniversities lists all universities
func (r *Repository) ListUniversities(ctx context.Context) ([]University, error) {
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
FROM universities ORDER BY name`
rows, err := r.db.Pool.Query(ctx, query)
if err != nil {
return nil, err
}
defer rows.Close()
var universities []University
for rows.Next() {
var u University
if err := rows.Scan(
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
); err != nil {
return nil, err
}
universities = append(universities, u)
}
return universities, rows.Err()
}
// ============================================================================
// DEPARTMENTS
// ============================================================================
// CreateDepartment creates or updates a department
func (r *Repository) CreateDepartment(ctx context.Context, d *Department) error {
query := `
INSERT INTO departments (university_id, name, name_en, url, category, parent_id)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (university_id, name) DO UPDATE SET
name_en = EXCLUDED.name_en,
url = EXCLUDED.url,
category = EXCLUDED.category,
parent_id = EXCLUDED.parent_id,
updated_at = NOW()
RETURNING id, created_at, updated_at
`
return r.db.Pool.QueryRow(ctx, query,
d.UniversityID, d.Name, d.NameEN, d.URL, d.Category, d.ParentID,
).Scan(&d.ID, &d.CreatedAt, &d.UpdatedAt)
}
// GetDepartmentByName retrieves a department by university and name
func (r *Repository) GetDepartmentByName(ctx context.Context, uniID uuid.UUID, name string) (*Department, error) {
query := `SELECT id, university_id, name, name_en, url, category, parent_id, created_at, updated_at
FROM departments WHERE university_id = $1 AND name = $2`
d := &Department{}
err := r.db.Pool.QueryRow(ctx, query, uniID, name).Scan(
&d.ID, &d.UniversityID, &d.Name, &d.NameEN, &d.URL, &d.Category,
&d.ParentID, &d.CreatedAt, &d.UpdatedAt,
)
if err != nil {
return nil, err
}
return d, nil
}
// ============================================================================
// CRAWL STATUS
// ============================================================================
// UpdateCrawlStatus updates crawl status for a university
func (r *Repository) UpdateCrawlStatus(ctx context.Context, status *UniversityCrawlStatus) error {
query := `
INSERT INTO university_crawl_status (
university_id, last_staff_crawl, staff_crawl_status, staff_count, staff_errors,
last_pub_crawl, pub_crawl_status, pub_count, pub_errors,
next_scheduled_crawl, crawl_priority
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (university_id) DO UPDATE SET
last_staff_crawl = EXCLUDED.last_staff_crawl,
staff_crawl_status = EXCLUDED.staff_crawl_status,
staff_count = EXCLUDED.staff_count,
staff_errors = EXCLUDED.staff_errors,
last_pub_crawl = EXCLUDED.last_pub_crawl,
pub_crawl_status = EXCLUDED.pub_crawl_status,
pub_count = EXCLUDED.pub_count,
pub_errors = EXCLUDED.pub_errors,
next_scheduled_crawl = EXCLUDED.next_scheduled_crawl,
crawl_priority = EXCLUDED.crawl_priority,
updated_at = NOW()
`
_, err := r.db.Pool.Exec(ctx, query,
status.UniversityID, status.LastStaffCrawl, status.StaffCrawlStatus, status.StaffCount, status.StaffErrors,
status.LastPubCrawl, status.PubCrawlStatus, status.PubCount, status.PubErrors,
status.NextScheduledCrawl, status.CrawlPriority,
)
return err
}
// GetCrawlStatus retrieves crawl status for a university
func (r *Repository) GetCrawlStatus(ctx context.Context, uniID uuid.UUID) (*UniversityCrawlStatus, error) {
query := `SELECT * FROM university_crawl_status WHERE university_id = $1`
s := &UniversityCrawlStatus{}
err := r.db.Pool.QueryRow(ctx, query, uniID).Scan(
&s.UniversityID, &s.LastStaffCrawl, &s.StaffCrawlStatus, &s.StaffCount, &s.StaffErrors,
&s.LastPubCrawl, &s.PubCrawlStatus, &s.PubCount, &s.PubErrors,
&s.NextScheduledCrawl, &s.CrawlPriority, &s.CreatedAt, &s.UpdatedAt,
)
if err == pgx.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return s, nil
}
// ============================================================================
// STATS
// ============================================================================
// GetStaffStats retrieves statistics about staff data
func (r *Repository) GetStaffStats(ctx context.Context) (*StaffStats, error) {
stats := &StaffStats{
ByState: make(map[string]int),
ByUniType: make(map[string]int),
ByPositionType: make(map[string]int),
}
// Basic counts
queries := []struct {
query string
dest *int
}{
{"SELECT COUNT(*) FROM university_staff WHERE is_active = true", &stats.TotalStaff},
{"SELECT COUNT(*) FROM university_staff WHERE is_professor = true AND is_active = true", &stats.TotalProfessors},
{"SELECT COUNT(*) FROM publications", &stats.TotalPublications},
{"SELECT COUNT(*) FROM universities", &stats.TotalUniversities},
}
for _, q := range queries {
if err := r.db.Pool.QueryRow(ctx, q.query).Scan(q.dest); err != nil {
return nil, err
}
}
// By state
rows, err := r.db.Pool.Query(ctx, `
SELECT COALESCE(u.state, 'unknown'), COUNT(*)
FROM university_staff s
JOIN universities u ON s.university_id = u.id
WHERE s.is_active = true
GROUP BY u.state
`)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var state string
var count int
if err := rows.Scan(&state, &count); err != nil {
return nil, err
}
stats.ByState[state] = count
}
// By uni type
rows2, err := r.db.Pool.Query(ctx, `
SELECT COALESCE(u.uni_type, 'unknown'), COUNT(*)
FROM university_staff s
JOIN universities u ON s.university_id = u.id
WHERE s.is_active = true
GROUP BY u.uni_type
`)
if err != nil {
return nil, err
}
defer rows2.Close()
for rows2.Next() {
var uniType string
var count int
if err := rows2.Scan(&uniType, &count); err != nil {
return nil, err
}
stats.ByUniType[uniType] = count
}
// By position type
rows3, err := r.db.Pool.Query(ctx, `
SELECT COALESCE(position_type, 'unknown'), COUNT(*)
FROM university_staff
WHERE is_active = true
GROUP BY position_type
`)
if err != nil {
return nil, err
}
defer rows3.Close()
for rows3.Next() {
var posType string
var count int
if err := rows3.Scan(&posType, &count); err != nil {
return nil, err
}
stats.ByPositionType[posType] = count
}
return stats, nil
}