Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 29s
sed replacement left orphaned hostname references in story page and empty lines in getApiBase functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
294 lines
8.9 KiB
Go
294 lines
8.9 KiB
Go
package database
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/jackc/pgx/v5"
|
|
)
|
|
|
|
// Repository provides database operations for staff and publications
|
|
type Repository struct {
|
|
db *DB
|
|
}
|
|
|
|
// NewRepository creates a new repository
|
|
func NewRepository(db *DB) *Repository {
|
|
return &Repository{db: db}
|
|
}
|
|
|
|
// ============================================================================
|
|
// UNIVERSITIES
|
|
// ============================================================================
|
|
|
|
// CreateUniversity creates a new university
|
|
func (r *Repository) CreateUniversity(ctx context.Context, u *University) error {
|
|
query := `
|
|
INSERT INTO universities (name, short_name, url, state, uni_type, staff_page_pattern)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
ON CONFLICT (url) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
short_name = EXCLUDED.short_name,
|
|
state = EXCLUDED.state,
|
|
uni_type = EXCLUDED.uni_type,
|
|
staff_page_pattern = EXCLUDED.staff_page_pattern,
|
|
updated_at = NOW()
|
|
RETURNING id, created_at, updated_at
|
|
`
|
|
return r.db.Pool.QueryRow(ctx, query,
|
|
u.Name, u.ShortName, u.URL, u.State, u.UniType, u.StaffPagePattern,
|
|
).Scan(&u.ID, &u.CreatedAt, &u.UpdatedAt)
|
|
}
|
|
|
|
// GetUniversity retrieves a university by ID
|
|
func (r *Repository) GetUniversity(ctx context.Context, id uuid.UUID) (*University, error) {
|
|
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
|
|
FROM universities WHERE id = $1`
|
|
|
|
u := &University{}
|
|
err := r.db.Pool.QueryRow(ctx, query, id).Scan(
|
|
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
|
|
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
|
|
)
|
|
if err == pgx.ErrNoRows {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return u, nil
|
|
}
|
|
|
|
// GetUniversityByID is an alias for GetUniversity (for interface compatibility)
|
|
func (r *Repository) GetUniversityByID(ctx context.Context, id uuid.UUID) (*University, error) {
|
|
return r.GetUniversity(ctx, id)
|
|
}
|
|
|
|
// GetUniversityByURL retrieves a university by URL
|
|
func (r *Repository) GetUniversityByURL(ctx context.Context, url string) (*University, error) {
|
|
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
|
|
FROM universities WHERE url = $1`
|
|
|
|
u := &University{}
|
|
err := r.db.Pool.QueryRow(ctx, query, url).Scan(
|
|
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
|
|
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return u, nil
|
|
}
|
|
|
|
// ListUniversities lists all universities
|
|
func (r *Repository) ListUniversities(ctx context.Context) ([]University, error) {
|
|
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
|
|
FROM universities ORDER BY name`
|
|
|
|
rows, err := r.db.Pool.Query(ctx, query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var universities []University
|
|
for rows.Next() {
|
|
var u University
|
|
if err := rows.Scan(
|
|
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
|
|
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
universities = append(universities, u)
|
|
}
|
|
return universities, rows.Err()
|
|
}
|
|
|
|
// ============================================================================
|
|
// DEPARTMENTS
|
|
// ============================================================================
|
|
|
|
// CreateDepartment creates or updates a department
|
|
func (r *Repository) CreateDepartment(ctx context.Context, d *Department) error {
|
|
query := `
|
|
INSERT INTO departments (university_id, name, name_en, url, category, parent_id)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
ON CONFLICT (university_id, name) DO UPDATE SET
|
|
name_en = EXCLUDED.name_en,
|
|
url = EXCLUDED.url,
|
|
category = EXCLUDED.category,
|
|
parent_id = EXCLUDED.parent_id,
|
|
updated_at = NOW()
|
|
RETURNING id, created_at, updated_at
|
|
`
|
|
return r.db.Pool.QueryRow(ctx, query,
|
|
d.UniversityID, d.Name, d.NameEN, d.URL, d.Category, d.ParentID,
|
|
).Scan(&d.ID, &d.CreatedAt, &d.UpdatedAt)
|
|
}
|
|
|
|
// GetDepartmentByName retrieves a department by university and name
|
|
func (r *Repository) GetDepartmentByName(ctx context.Context, uniID uuid.UUID, name string) (*Department, error) {
|
|
query := `SELECT id, university_id, name, name_en, url, category, parent_id, created_at, updated_at
|
|
FROM departments WHERE university_id = $1 AND name = $2`
|
|
|
|
d := &Department{}
|
|
err := r.db.Pool.QueryRow(ctx, query, uniID, name).Scan(
|
|
&d.ID, &d.UniversityID, &d.Name, &d.NameEN, &d.URL, &d.Category,
|
|
&d.ParentID, &d.CreatedAt, &d.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// ============================================================================
|
|
// CRAWL STATUS
|
|
// ============================================================================
|
|
|
|
// UpdateCrawlStatus updates crawl status for a university
|
|
func (r *Repository) UpdateCrawlStatus(ctx context.Context, status *UniversityCrawlStatus) error {
|
|
query := `
|
|
INSERT INTO university_crawl_status (
|
|
university_id, last_staff_crawl, staff_crawl_status, staff_count, staff_errors,
|
|
last_pub_crawl, pub_crawl_status, pub_count, pub_errors,
|
|
next_scheduled_crawl, crawl_priority
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
|
ON CONFLICT (university_id) DO UPDATE SET
|
|
last_staff_crawl = EXCLUDED.last_staff_crawl,
|
|
staff_crawl_status = EXCLUDED.staff_crawl_status,
|
|
staff_count = EXCLUDED.staff_count,
|
|
staff_errors = EXCLUDED.staff_errors,
|
|
last_pub_crawl = EXCLUDED.last_pub_crawl,
|
|
pub_crawl_status = EXCLUDED.pub_crawl_status,
|
|
pub_count = EXCLUDED.pub_count,
|
|
pub_errors = EXCLUDED.pub_errors,
|
|
next_scheduled_crawl = EXCLUDED.next_scheduled_crawl,
|
|
crawl_priority = EXCLUDED.crawl_priority,
|
|
updated_at = NOW()
|
|
`
|
|
_, err := r.db.Pool.Exec(ctx, query,
|
|
status.UniversityID, status.LastStaffCrawl, status.StaffCrawlStatus, status.StaffCount, status.StaffErrors,
|
|
status.LastPubCrawl, status.PubCrawlStatus, status.PubCount, status.PubErrors,
|
|
status.NextScheduledCrawl, status.CrawlPriority,
|
|
)
|
|
return err
|
|
}
|
|
|
|
// GetCrawlStatus retrieves crawl status for a university
|
|
func (r *Repository) GetCrawlStatus(ctx context.Context, uniID uuid.UUID) (*UniversityCrawlStatus, error) {
|
|
query := `SELECT * FROM university_crawl_status WHERE university_id = $1`
|
|
|
|
s := &UniversityCrawlStatus{}
|
|
err := r.db.Pool.QueryRow(ctx, query, uniID).Scan(
|
|
&s.UniversityID, &s.LastStaffCrawl, &s.StaffCrawlStatus, &s.StaffCount, &s.StaffErrors,
|
|
&s.LastPubCrawl, &s.PubCrawlStatus, &s.PubCount, &s.PubErrors,
|
|
&s.NextScheduledCrawl, &s.CrawlPriority, &s.CreatedAt, &s.UpdatedAt,
|
|
)
|
|
if err == pgx.ErrNoRows {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
// ============================================================================
|
|
// STATS
|
|
// ============================================================================
|
|
|
|
// GetStaffStats retrieves statistics about staff data
|
|
func (r *Repository) GetStaffStats(ctx context.Context) (*StaffStats, error) {
|
|
stats := &StaffStats{
|
|
ByState: make(map[string]int),
|
|
ByUniType: make(map[string]int),
|
|
ByPositionType: make(map[string]int),
|
|
}
|
|
|
|
// Basic counts
|
|
queries := []struct {
|
|
query string
|
|
dest *int
|
|
}{
|
|
{"SELECT COUNT(*) FROM university_staff WHERE is_active = true", &stats.TotalStaff},
|
|
{"SELECT COUNT(*) FROM university_staff WHERE is_professor = true AND is_active = true", &stats.TotalProfessors},
|
|
{"SELECT COUNT(*) FROM publications", &stats.TotalPublications},
|
|
{"SELECT COUNT(*) FROM universities", &stats.TotalUniversities},
|
|
}
|
|
|
|
for _, q := range queries {
|
|
if err := r.db.Pool.QueryRow(ctx, q.query).Scan(q.dest); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// By state
|
|
rows, err := r.db.Pool.Query(ctx, `
|
|
SELECT COALESCE(u.state, 'unknown'), COUNT(*)
|
|
FROM university_staff s
|
|
JOIN universities u ON s.university_id = u.id
|
|
WHERE s.is_active = true
|
|
GROUP BY u.state
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var state string
|
|
var count int
|
|
if err := rows.Scan(&state, &count); err != nil {
|
|
return nil, err
|
|
}
|
|
stats.ByState[state] = count
|
|
}
|
|
|
|
// By uni type
|
|
rows2, err := r.db.Pool.Query(ctx, `
|
|
SELECT COALESCE(u.uni_type, 'unknown'), COUNT(*)
|
|
FROM university_staff s
|
|
JOIN universities u ON s.university_id = u.id
|
|
WHERE s.is_active = true
|
|
GROUP BY u.uni_type
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows2.Close()
|
|
|
|
for rows2.Next() {
|
|
var uniType string
|
|
var count int
|
|
if err := rows2.Scan(&uniType, &count); err != nil {
|
|
return nil, err
|
|
}
|
|
stats.ByUniType[uniType] = count
|
|
}
|
|
|
|
// By position type
|
|
rows3, err := r.db.Pool.Query(ctx, `
|
|
SELECT COALESCE(position_type, 'unknown'), COUNT(*)
|
|
FROM university_staff
|
|
WHERE is_active = true
|
|
GROUP BY position_type
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows3.Close()
|
|
|
|
for rows3.Next() {
|
|
var posType string
|
|
var count int
|
|
if err := rows3.Scan(&posType, &count); err != nil {
|
|
return nil, err
|
|
}
|
|
stats.ByPositionType[posType] = count
|
|
}
|
|
|
|
return stats, nil
|
|
}
|