All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
685 lines
22 KiB
Go
685 lines
22 KiB
Go
package database
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/jackc/pgx/v5"
|
|
)
|
|
|
|
// Repository provides database operations for staff and publications
|
|
type Repository struct {
|
|
db *DB
|
|
}
|
|
|
|
// NewRepository creates a new repository
|
|
func NewRepository(db *DB) *Repository {
|
|
return &Repository{db: db}
|
|
}
|
|
|
|
// ============================================================================
|
|
// UNIVERSITIES
|
|
// ============================================================================
|
|
|
|
// CreateUniversity creates a new university
|
|
func (r *Repository) CreateUniversity(ctx context.Context, u *University) error {
|
|
query := `
|
|
INSERT INTO universities (name, short_name, url, state, uni_type, staff_page_pattern)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
ON CONFLICT (url) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
short_name = EXCLUDED.short_name,
|
|
state = EXCLUDED.state,
|
|
uni_type = EXCLUDED.uni_type,
|
|
staff_page_pattern = EXCLUDED.staff_page_pattern,
|
|
updated_at = NOW()
|
|
RETURNING id, created_at, updated_at
|
|
`
|
|
return r.db.Pool.QueryRow(ctx, query,
|
|
u.Name, u.ShortName, u.URL, u.State, u.UniType, u.StaffPagePattern,
|
|
).Scan(&u.ID, &u.CreatedAt, &u.UpdatedAt)
|
|
}
|
|
|
|
// GetUniversity retrieves a university by ID
|
|
func (r *Repository) GetUniversity(ctx context.Context, id uuid.UUID) (*University, error) {
|
|
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
|
|
FROM universities WHERE id = $1`
|
|
|
|
u := &University{}
|
|
err := r.db.Pool.QueryRow(ctx, query, id).Scan(
|
|
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
|
|
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
|
|
)
|
|
if err == pgx.ErrNoRows {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return u, nil
|
|
}
|
|
|
|
// GetUniversityByID is an alias for GetUniversity (for interface compatibility)
|
|
func (r *Repository) GetUniversityByID(ctx context.Context, id uuid.UUID) (*University, error) {
|
|
return r.GetUniversity(ctx, id)
|
|
}
|
|
|
|
// GetUniversityByURL retrieves a university by URL
|
|
func (r *Repository) GetUniversityByURL(ctx context.Context, url string) (*University, error) {
|
|
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
|
|
FROM universities WHERE url = $1`
|
|
|
|
u := &University{}
|
|
err := r.db.Pool.QueryRow(ctx, query, url).Scan(
|
|
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
|
|
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return u, nil
|
|
}
|
|
|
|
// ListUniversities lists all universities
|
|
func (r *Repository) ListUniversities(ctx context.Context) ([]University, error) {
|
|
query := `SELECT id, name, short_name, url, state, uni_type, staff_page_pattern, created_at, updated_at
|
|
FROM universities ORDER BY name`
|
|
|
|
rows, err := r.db.Pool.Query(ctx, query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var universities []University
|
|
for rows.Next() {
|
|
var u University
|
|
if err := rows.Scan(
|
|
&u.ID, &u.Name, &u.ShortName, &u.URL, &u.State, &u.UniType,
|
|
&u.StaffPagePattern, &u.CreatedAt, &u.UpdatedAt,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
universities = append(universities, u)
|
|
}
|
|
return universities, rows.Err()
|
|
}
|
|
|
|
// ============================================================================
|
|
// DEPARTMENTS
|
|
// ============================================================================
|
|
|
|
// CreateDepartment creates or updates a department
|
|
func (r *Repository) CreateDepartment(ctx context.Context, d *Department) error {
|
|
query := `
|
|
INSERT INTO departments (university_id, name, name_en, url, category, parent_id)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
ON CONFLICT (university_id, name) DO UPDATE SET
|
|
name_en = EXCLUDED.name_en,
|
|
url = EXCLUDED.url,
|
|
category = EXCLUDED.category,
|
|
parent_id = EXCLUDED.parent_id,
|
|
updated_at = NOW()
|
|
RETURNING id, created_at, updated_at
|
|
`
|
|
return r.db.Pool.QueryRow(ctx, query,
|
|
d.UniversityID, d.Name, d.NameEN, d.URL, d.Category, d.ParentID,
|
|
).Scan(&d.ID, &d.CreatedAt, &d.UpdatedAt)
|
|
}
|
|
|
|
// GetDepartmentByName retrieves a department by university and name
|
|
func (r *Repository) GetDepartmentByName(ctx context.Context, uniID uuid.UUID, name string) (*Department, error) {
|
|
query := `SELECT id, university_id, name, name_en, url, category, parent_id, created_at, updated_at
|
|
FROM departments WHERE university_id = $1 AND name = $2`
|
|
|
|
d := &Department{}
|
|
err := r.db.Pool.QueryRow(ctx, query, uniID, name).Scan(
|
|
&d.ID, &d.UniversityID, &d.Name, &d.NameEN, &d.URL, &d.Category,
|
|
&d.ParentID, &d.CreatedAt, &d.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// ============================================================================
|
|
// STAFF
|
|
// ============================================================================
|
|
|
|
// CreateStaff creates or updates a staff member
|
|
func (r *Repository) CreateStaff(ctx context.Context, s *UniversityStaff) error {
|
|
query := `
|
|
INSERT INTO university_staff (
|
|
university_id, department_id, first_name, last_name, full_name,
|
|
title, academic_title, position, position_type, is_professor,
|
|
email, phone, office, profile_url, photo_url,
|
|
orcid, google_scholar_id, researchgate_url, linkedin_url, personal_website,
|
|
research_interests, research_summary, supervisor_id, team_role, source_url
|
|
) VALUES (
|
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
|
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
|
$21, $22, $23, $24, $25
|
|
)
|
|
ON CONFLICT (university_id, first_name, last_name, COALESCE(department_id, '00000000-0000-0000-0000-000000000000'::uuid))
|
|
DO UPDATE SET
|
|
full_name = EXCLUDED.full_name,
|
|
title = EXCLUDED.title,
|
|
academic_title = EXCLUDED.academic_title,
|
|
position = EXCLUDED.position,
|
|
position_type = EXCLUDED.position_type,
|
|
is_professor = EXCLUDED.is_professor,
|
|
email = COALESCE(EXCLUDED.email, university_staff.email),
|
|
phone = COALESCE(EXCLUDED.phone, university_staff.phone),
|
|
office = COALESCE(EXCLUDED.office, university_staff.office),
|
|
profile_url = COALESCE(EXCLUDED.profile_url, university_staff.profile_url),
|
|
photo_url = COALESCE(EXCLUDED.photo_url, university_staff.photo_url),
|
|
orcid = COALESCE(EXCLUDED.orcid, university_staff.orcid),
|
|
google_scholar_id = COALESCE(EXCLUDED.google_scholar_id, university_staff.google_scholar_id),
|
|
researchgate_url = COALESCE(EXCLUDED.researchgate_url, university_staff.researchgate_url),
|
|
linkedin_url = COALESCE(EXCLUDED.linkedin_url, university_staff.linkedin_url),
|
|
personal_website = COALESCE(EXCLUDED.personal_website, university_staff.personal_website),
|
|
research_interests = COALESCE(EXCLUDED.research_interests, university_staff.research_interests),
|
|
research_summary = COALESCE(EXCLUDED.research_summary, university_staff.research_summary),
|
|
supervisor_id = COALESCE(EXCLUDED.supervisor_id, university_staff.supervisor_id),
|
|
team_role = COALESCE(EXCLUDED.team_role, university_staff.team_role),
|
|
source_url = COALESCE(EXCLUDED.source_url, university_staff.source_url),
|
|
crawled_at = NOW(),
|
|
updated_at = NOW()
|
|
RETURNING id, crawled_at, created_at, updated_at
|
|
`
|
|
return r.db.Pool.QueryRow(ctx, query,
|
|
s.UniversityID, s.DepartmentID, s.FirstName, s.LastName, s.FullName,
|
|
s.Title, s.AcademicTitle, s.Position, s.PositionType, s.IsProfessor,
|
|
s.Email, s.Phone, s.Office, s.ProfileURL, s.PhotoURL,
|
|
s.ORCID, s.GoogleScholarID, s.ResearchgateURL, s.LinkedInURL, s.PersonalWebsite,
|
|
s.ResearchInterests, s.ResearchSummary, s.SupervisorID, s.TeamRole, s.SourceURL,
|
|
).Scan(&s.ID, &s.CrawledAt, &s.CreatedAt, &s.UpdatedAt)
|
|
}
|
|
|
|
// GetStaff retrieves a staff member by ID
|
|
func (r *Repository) GetStaff(ctx context.Context, id uuid.UUID) (*UniversityStaff, error) {
|
|
query := `SELECT * FROM v_staff_full WHERE id = $1`
|
|
|
|
s := &UniversityStaff{}
|
|
err := r.db.Pool.QueryRow(ctx, query, id).Scan(
|
|
&s.ID, &s.UniversityID, &s.DepartmentID, &s.FirstName, &s.LastName, &s.FullName,
|
|
&s.Title, &s.AcademicTitle, &s.Position, &s.PositionType, &s.IsProfessor,
|
|
&s.Email, &s.Phone, &s.Office, &s.ProfileURL, &s.PhotoURL,
|
|
&s.ORCID, &s.GoogleScholarID, &s.ResearchgateURL, &s.LinkedInURL, &s.PersonalWebsite,
|
|
&s.ResearchInterests, &s.ResearchSummary, &s.CrawledAt, &s.LastVerified, &s.IsActive, &s.SourceURL,
|
|
&s.CreatedAt, &s.UpdatedAt, &s.UniversityName, &s.UniversityShort, nil, nil,
|
|
&s.DepartmentName, nil, &s.PublicationCount,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
// SearchStaff searches for staff members
|
|
func (r *Repository) SearchStaff(ctx context.Context, params StaffSearchParams) (*StaffSearchResult, error) {
|
|
// Build query dynamically
|
|
var conditions []string
|
|
var args []interface{}
|
|
argNum := 1
|
|
|
|
baseQuery := `
|
|
SELECT s.id, s.university_id, s.department_id, s.first_name, s.last_name, s.full_name,
|
|
s.title, s.academic_title, s.position, s.position_type, s.is_professor,
|
|
s.email, s.profile_url, s.photo_url, s.orcid,
|
|
s.research_interests, s.crawled_at, s.is_active,
|
|
u.name as university_name, u.short_name as university_short, u.state as university_state,
|
|
d.name as department_name,
|
|
(SELECT COUNT(*) FROM staff_publications sp WHERE sp.staff_id = s.id) as publication_count
|
|
FROM university_staff s
|
|
JOIN universities u ON s.university_id = u.id
|
|
LEFT JOIN departments d ON s.department_id = d.id
|
|
`
|
|
|
|
if params.Query != "" {
|
|
conditions = append(conditions, fmt.Sprintf(
|
|
`(to_tsvector('german', COALESCE(s.full_name, '') || ' ' || COALESCE(s.research_summary, '')) @@ plainto_tsquery('german', $%d)
|
|
OR s.full_name ILIKE '%%' || $%d || '%%'
|
|
OR s.last_name ILIKE '%%' || $%d || '%%')`,
|
|
argNum, argNum, argNum))
|
|
args = append(args, params.Query)
|
|
argNum++
|
|
}
|
|
|
|
if params.UniversityID != nil {
|
|
conditions = append(conditions, fmt.Sprintf("s.university_id = $%d", argNum))
|
|
args = append(args, *params.UniversityID)
|
|
argNum++
|
|
}
|
|
|
|
if params.DepartmentID != nil {
|
|
conditions = append(conditions, fmt.Sprintf("s.department_id = $%d", argNum))
|
|
args = append(args, *params.DepartmentID)
|
|
argNum++
|
|
}
|
|
|
|
if params.State != nil {
|
|
conditions = append(conditions, fmt.Sprintf("u.state = $%d", argNum))
|
|
args = append(args, *params.State)
|
|
argNum++
|
|
}
|
|
|
|
if params.UniType != nil {
|
|
conditions = append(conditions, fmt.Sprintf("u.uni_type = $%d", argNum))
|
|
args = append(args, *params.UniType)
|
|
argNum++
|
|
}
|
|
|
|
if params.PositionType != nil {
|
|
conditions = append(conditions, fmt.Sprintf("s.position_type = $%d", argNum))
|
|
args = append(args, *params.PositionType)
|
|
argNum++
|
|
}
|
|
|
|
if params.IsProfessor != nil {
|
|
conditions = append(conditions, fmt.Sprintf("s.is_professor = $%d", argNum))
|
|
args = append(args, *params.IsProfessor)
|
|
argNum++
|
|
}
|
|
|
|
// Build WHERE clause
|
|
whereClause := ""
|
|
if len(conditions) > 0 {
|
|
whereClause = "WHERE " + strings.Join(conditions, " AND ")
|
|
}
|
|
|
|
// Count total
|
|
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM university_staff s JOIN universities u ON s.university_id = u.id LEFT JOIN departments d ON s.department_id = d.id %s", whereClause)
|
|
var total int
|
|
if err := r.db.Pool.QueryRow(ctx, countQuery, args...).Scan(&total); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Apply pagination
|
|
limit := params.Limit
|
|
if limit <= 0 {
|
|
limit = 20
|
|
}
|
|
if limit > 100 {
|
|
limit = 100
|
|
}
|
|
|
|
offset := params.Offset
|
|
if offset < 0 {
|
|
offset = 0
|
|
}
|
|
|
|
// Full query with pagination
|
|
fullQuery := fmt.Sprintf("%s %s ORDER BY s.is_professor DESC, s.last_name ASC LIMIT %d OFFSET %d",
|
|
baseQuery, whereClause, limit, offset)
|
|
|
|
rows, err := r.db.Pool.Query(ctx, fullQuery, args...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var staff []UniversityStaff
|
|
for rows.Next() {
|
|
var s UniversityStaff
|
|
var uniState *string
|
|
if err := rows.Scan(
|
|
&s.ID, &s.UniversityID, &s.DepartmentID, &s.FirstName, &s.LastName, &s.FullName,
|
|
&s.Title, &s.AcademicTitle, &s.Position, &s.PositionType, &s.IsProfessor,
|
|
&s.Email, &s.ProfileURL, &s.PhotoURL, &s.ORCID,
|
|
&s.ResearchInterests, &s.CrawledAt, &s.IsActive,
|
|
&s.UniversityName, &s.UniversityShort, &uniState,
|
|
&s.DepartmentName, &s.PublicationCount,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
staff = append(staff, s)
|
|
}
|
|
|
|
return &StaffSearchResult{
|
|
Staff: staff,
|
|
Total: total,
|
|
Limit: limit,
|
|
Offset: offset,
|
|
Query: params.Query,
|
|
}, rows.Err()
|
|
}
|
|
|
|
// ============================================================================
|
|
// PUBLICATIONS
|
|
// ============================================================================
|
|
|
|
// CreatePublication creates or updates a publication
|
|
func (r *Repository) CreatePublication(ctx context.Context, p *Publication) error {
|
|
query := `
|
|
INSERT INTO publications (
|
|
title, title_en, abstract, abstract_en, year, month,
|
|
pub_type, venue, venue_short, publisher,
|
|
doi, isbn, issn, arxiv_id, pubmed_id,
|
|
url, pdf_url, citation_count, keywords, topics, source, raw_data
|
|
) VALUES (
|
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
|
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22
|
|
)
|
|
ON CONFLICT (doi) WHERE doi IS NOT NULL DO UPDATE SET
|
|
title = EXCLUDED.title,
|
|
abstract = EXCLUDED.abstract,
|
|
year = EXCLUDED.year,
|
|
venue = EXCLUDED.venue,
|
|
citation_count = EXCLUDED.citation_count,
|
|
updated_at = NOW()
|
|
RETURNING id, crawled_at, created_at, updated_at
|
|
`
|
|
|
|
// Handle potential duplicate without DOI
|
|
err := r.db.Pool.QueryRow(ctx, query,
|
|
p.Title, p.TitleEN, p.Abstract, p.AbstractEN, p.Year, p.Month,
|
|
p.PubType, p.Venue, p.VenueShort, p.Publisher,
|
|
p.DOI, p.ISBN, p.ISSN, p.ArxivID, p.PubmedID,
|
|
p.URL, p.PDFURL, p.CitationCount, p.Keywords, p.Topics, p.Source, p.RawData,
|
|
).Scan(&p.ID, &p.CrawledAt, &p.CreatedAt, &p.UpdatedAt)
|
|
|
|
if err != nil && strings.Contains(err.Error(), "duplicate") {
|
|
// Try to find existing publication by title and year
|
|
findQuery := `SELECT id FROM publications WHERE title = $1 AND year = $2`
|
|
err = r.db.Pool.QueryRow(ctx, findQuery, p.Title, p.Year).Scan(&p.ID)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// LinkStaffPublication creates a link between staff and publication
|
|
func (r *Repository) LinkStaffPublication(ctx context.Context, sp *StaffPublication) error {
|
|
query := `
|
|
INSERT INTO staff_publications (staff_id, publication_id, author_position, is_corresponding)
|
|
VALUES ($1, $2, $3, $4)
|
|
ON CONFLICT (staff_id, publication_id) DO UPDATE SET
|
|
author_position = EXCLUDED.author_position,
|
|
is_corresponding = EXCLUDED.is_corresponding
|
|
`
|
|
_, err := r.db.Pool.Exec(ctx, query,
|
|
sp.StaffID, sp.PublicationID, sp.AuthorPosition, sp.IsCorresponding,
|
|
)
|
|
return err
|
|
}
|
|
|
|
// GetStaffPublications retrieves all publications for a staff member
|
|
func (r *Repository) GetStaffPublications(ctx context.Context, staffID uuid.UUID) ([]Publication, error) {
|
|
query := `
|
|
SELECT p.id, p.title, p.abstract, p.year, p.pub_type, p.venue, p.doi, p.url, p.citation_count
|
|
FROM publications p
|
|
JOIN staff_publications sp ON p.id = sp.publication_id
|
|
WHERE sp.staff_id = $1
|
|
ORDER BY p.year DESC NULLS LAST, p.title
|
|
`
|
|
|
|
rows, err := r.db.Pool.Query(ctx, query, staffID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var pubs []Publication
|
|
for rows.Next() {
|
|
var p Publication
|
|
if err := rows.Scan(
|
|
&p.ID, &p.Title, &p.Abstract, &p.Year, &p.PubType, &p.Venue, &p.DOI, &p.URL, &p.CitationCount,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
pubs = append(pubs, p)
|
|
}
|
|
return pubs, rows.Err()
|
|
}
|
|
|
|
// SearchPublications searches for publications
|
|
func (r *Repository) SearchPublications(ctx context.Context, params PublicationSearchParams) (*PublicationSearchResult, error) {
|
|
var conditions []string
|
|
var args []interface{}
|
|
argNum := 1
|
|
|
|
if params.Query != "" {
|
|
conditions = append(conditions, fmt.Sprintf(
|
|
`to_tsvector('german', COALESCE(title, '') || ' ' || COALESCE(abstract, '')) @@ plainto_tsquery('german', $%d)`,
|
|
argNum))
|
|
args = append(args, params.Query)
|
|
argNum++
|
|
}
|
|
|
|
if params.StaffID != nil {
|
|
conditions = append(conditions, fmt.Sprintf(
|
|
`id IN (SELECT publication_id FROM staff_publications WHERE staff_id = $%d)`,
|
|
argNum))
|
|
args = append(args, *params.StaffID)
|
|
argNum++
|
|
}
|
|
|
|
if params.Year != nil {
|
|
conditions = append(conditions, fmt.Sprintf("year = $%d", argNum))
|
|
args = append(args, *params.Year)
|
|
argNum++
|
|
}
|
|
|
|
if params.YearFrom != nil {
|
|
conditions = append(conditions, fmt.Sprintf("year >= $%d", argNum))
|
|
args = append(args, *params.YearFrom)
|
|
argNum++
|
|
}
|
|
|
|
if params.YearTo != nil {
|
|
conditions = append(conditions, fmt.Sprintf("year <= $%d", argNum))
|
|
args = append(args, *params.YearTo)
|
|
argNum++
|
|
}
|
|
|
|
if params.PubType != nil {
|
|
conditions = append(conditions, fmt.Sprintf("pub_type = $%d", argNum))
|
|
args = append(args, *params.PubType)
|
|
argNum++
|
|
}
|
|
|
|
whereClause := ""
|
|
if len(conditions) > 0 {
|
|
whereClause = "WHERE " + strings.Join(conditions, " AND ")
|
|
}
|
|
|
|
// Count
|
|
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM publications %s", whereClause)
|
|
var total int
|
|
if err := r.db.Pool.QueryRow(ctx, countQuery, args...).Scan(&total); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Pagination
|
|
limit := params.Limit
|
|
if limit <= 0 {
|
|
limit = 20
|
|
}
|
|
offset := params.Offset
|
|
|
|
// Query
|
|
query := fmt.Sprintf(`
|
|
SELECT id, title, abstract, year, pub_type, venue, doi, url, citation_count, keywords
|
|
FROM publications %s
|
|
ORDER BY year DESC NULLS LAST, citation_count DESC
|
|
LIMIT %d OFFSET %d
|
|
`, whereClause, limit, offset)
|
|
|
|
rows, err := r.db.Pool.Query(ctx, query, args...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var pubs []Publication
|
|
for rows.Next() {
|
|
var p Publication
|
|
if err := rows.Scan(
|
|
&p.ID, &p.Title, &p.Abstract, &p.Year, &p.PubType, &p.Venue, &p.DOI, &p.URL, &p.CitationCount, &p.Keywords,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
pubs = append(pubs, p)
|
|
}
|
|
|
|
return &PublicationSearchResult{
|
|
Publications: pubs,
|
|
Total: total,
|
|
Limit: limit,
|
|
Offset: offset,
|
|
Query: params.Query,
|
|
}, rows.Err()
|
|
}
|
|
|
|
// ============================================================================
|
|
// CRAWL STATUS
|
|
// ============================================================================
|
|
|
|
// UpdateCrawlStatus updates crawl status for a university
|
|
func (r *Repository) UpdateCrawlStatus(ctx context.Context, status *UniversityCrawlStatus) error {
|
|
query := `
|
|
INSERT INTO university_crawl_status (
|
|
university_id, last_staff_crawl, staff_crawl_status, staff_count, staff_errors,
|
|
last_pub_crawl, pub_crawl_status, pub_count, pub_errors,
|
|
next_scheduled_crawl, crawl_priority
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
|
ON CONFLICT (university_id) DO UPDATE SET
|
|
last_staff_crawl = EXCLUDED.last_staff_crawl,
|
|
staff_crawl_status = EXCLUDED.staff_crawl_status,
|
|
staff_count = EXCLUDED.staff_count,
|
|
staff_errors = EXCLUDED.staff_errors,
|
|
last_pub_crawl = EXCLUDED.last_pub_crawl,
|
|
pub_crawl_status = EXCLUDED.pub_crawl_status,
|
|
pub_count = EXCLUDED.pub_count,
|
|
pub_errors = EXCLUDED.pub_errors,
|
|
next_scheduled_crawl = EXCLUDED.next_scheduled_crawl,
|
|
crawl_priority = EXCLUDED.crawl_priority,
|
|
updated_at = NOW()
|
|
`
|
|
_, err := r.db.Pool.Exec(ctx, query,
|
|
status.UniversityID, status.LastStaffCrawl, status.StaffCrawlStatus, status.StaffCount, status.StaffErrors,
|
|
status.LastPubCrawl, status.PubCrawlStatus, status.PubCount, status.PubErrors,
|
|
status.NextScheduledCrawl, status.CrawlPriority,
|
|
)
|
|
return err
|
|
}
|
|
|
|
// GetCrawlStatus retrieves crawl status for a university
|
|
func (r *Repository) GetCrawlStatus(ctx context.Context, uniID uuid.UUID) (*UniversityCrawlStatus, error) {
|
|
query := `SELECT * FROM university_crawl_status WHERE university_id = $1`
|
|
|
|
s := &UniversityCrawlStatus{}
|
|
err := r.db.Pool.QueryRow(ctx, query, uniID).Scan(
|
|
&s.UniversityID, &s.LastStaffCrawl, &s.StaffCrawlStatus, &s.StaffCount, &s.StaffErrors,
|
|
&s.LastPubCrawl, &s.PubCrawlStatus, &s.PubCount, &s.PubErrors,
|
|
&s.NextScheduledCrawl, &s.CrawlPriority, &s.CreatedAt, &s.UpdatedAt,
|
|
)
|
|
if err == pgx.ErrNoRows {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
// ============================================================================
|
|
// STATS
|
|
// ============================================================================
|
|
|
|
// GetStaffStats retrieves statistics about staff data
|
|
func (r *Repository) GetStaffStats(ctx context.Context) (*StaffStats, error) {
|
|
stats := &StaffStats{
|
|
ByState: make(map[string]int),
|
|
ByUniType: make(map[string]int),
|
|
ByPositionType: make(map[string]int),
|
|
}
|
|
|
|
// Basic counts
|
|
queries := []struct {
|
|
query string
|
|
dest *int
|
|
}{
|
|
{"SELECT COUNT(*) FROM university_staff WHERE is_active = true", &stats.TotalStaff},
|
|
{"SELECT COUNT(*) FROM university_staff WHERE is_professor = true AND is_active = true", &stats.TotalProfessors},
|
|
{"SELECT COUNT(*) FROM publications", &stats.TotalPublications},
|
|
{"SELECT COUNT(*) FROM universities", &stats.TotalUniversities},
|
|
}
|
|
|
|
for _, q := range queries {
|
|
if err := r.db.Pool.QueryRow(ctx, q.query).Scan(q.dest); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// By state
|
|
rows, err := r.db.Pool.Query(ctx, `
|
|
SELECT COALESCE(u.state, 'unknown'), COUNT(*)
|
|
FROM university_staff s
|
|
JOIN universities u ON s.university_id = u.id
|
|
WHERE s.is_active = true
|
|
GROUP BY u.state
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var state string
|
|
var count int
|
|
if err := rows.Scan(&state, &count); err != nil {
|
|
return nil, err
|
|
}
|
|
stats.ByState[state] = count
|
|
}
|
|
|
|
// By uni type
|
|
rows2, err := r.db.Pool.Query(ctx, `
|
|
SELECT COALESCE(u.uni_type, 'unknown'), COUNT(*)
|
|
FROM university_staff s
|
|
JOIN universities u ON s.university_id = u.id
|
|
WHERE s.is_active = true
|
|
GROUP BY u.uni_type
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows2.Close()
|
|
|
|
for rows2.Next() {
|
|
var uniType string
|
|
var count int
|
|
if err := rows2.Scan(&uniType, &count); err != nil {
|
|
return nil, err
|
|
}
|
|
stats.ByUniType[uniType] = count
|
|
}
|
|
|
|
// By position type
|
|
rows3, err := r.db.Pool.Query(ctx, `
|
|
SELECT COALESCE(position_type, 'unknown'), COUNT(*)
|
|
FROM university_staff
|
|
WHERE is_active = true
|
|
GROUP BY position_type
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows3.Close()
|
|
|
|
for rows3.Next() {
|
|
var posType string
|
|
var count int
|
|
if err := rows3.Scan(&posType, &count); err != nil {
|
|
return nil, err
|
|
}
|
|
stats.ByPositionType[posType] = count
|
|
}
|
|
|
|
return stats, nil
|
|
}
|