fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1,369 @@
package publications
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/breakpilot/edu-search-service/internal/database"
"github.com/google/uuid"
)
// CrossRefClient is a client for the CrossRef API
type CrossRefClient struct {
client *http.Client
baseURL string
userAgent string
email string // For polite pool access
}
// CrossRefResponse represents the top-level API response
type CrossRefResponse struct {
Status string `json:"status"`
MessageType string `json:"message-type"`
MessageVersion string `json:"message-version"`
Message CrossRefResult `json:"message"`
}
// CrossRefResult contains the actual results
type CrossRefResult struct {
TotalResults int `json:"total-results"`
Items []CrossRefWork `json:"items"`
Query *CrossRefQuery `json:"query,omitempty"`
}
// CrossRefQuery contains query info
type CrossRefQuery struct {
StartIndex int `json:"start-index"`
SearchTerms string `json:"search-terms"`
}
// CrossRefWork represents a single work/publication
type CrossRefWork struct {
DOI string `json:"DOI"`
Title []string `json:"title"`
ContainerTitle []string `json:"container-title"`
Publisher string `json:"publisher"`
Type string `json:"type"`
Author []CrossRefAuthor `json:"author"`
Issued CrossRefDate `json:"issued"`
PublishedPrint CrossRefDate `json:"published-print"`
Abstract string `json:"abstract"`
URL string `json:"URL"`
Link []CrossRefLink `json:"link"`
Subject []string `json:"subject"`
ISSN []string `json:"ISSN"`
ISBN []string `json:"ISBN"`
IsCitedByCount int `json:"is-referenced-by-count"`
}
// CrossRefAuthor represents an author
type CrossRefAuthor struct {
Given string `json:"given"`
Family string `json:"family"`
ORCID string `json:"ORCID"`
Affiliation []struct {
Name string `json:"name"`
} `json:"affiliation"`
Sequence string `json:"sequence"` // "first" or "additional"
}
// CrossRefDate represents a date
type CrossRefDate struct {
DateParts [][]int `json:"date-parts"`
}
// CrossRefLink represents a link to the work
type CrossRefLink struct {
URL string `json:"URL"`
ContentType string `json:"content-type"`
}
// NewCrossRefClient creates a new CrossRef API client
func NewCrossRefClient(email string) *CrossRefClient {
return &CrossRefClient{
client: &http.Client{
Timeout: 30 * time.Second,
},
baseURL: "https://api.crossref.org",
userAgent: "BreakPilot-EduBot/1.0 (https://breakpilot.de; mailto:" + email + ")",
email: email,
}
}
// GetWorkByDOI retrieves a work by its DOI
func (c *CrossRefClient) GetWorkByDOI(ctx context.Context, doi string) (*database.Publication, error) {
// Clean DOI
doi = strings.TrimSpace(doi)
doi = strings.TrimPrefix(doi, "https://doi.org/")
doi = strings.TrimPrefix(doi, "http://doi.org/")
endpoint := fmt.Sprintf("%s/works/%s", c.baseURL, url.PathEscape(doi))
req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", c.userAgent)
resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
return nil, fmt.Errorf("DOI not found: %s", doi)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("CrossRef API error: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
var result struct {
Status string `json:"status"`
Message CrossRefWork `json:"message"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, err
}
return c.convertToPub(&result.Message), nil
}
// SearchByAuthor searches for publications by author name
func (c *CrossRefClient) SearchByAuthor(ctx context.Context, authorName string, limit int) ([]*database.Publication, error) {
if limit <= 0 {
limit = 20
}
endpoint := fmt.Sprintf("%s/works?query.author=%s&rows=%d&sort=published&order=desc",
c.baseURL, url.QueryEscape(authorName), limit)
return c.searchWorks(ctx, endpoint)
}
// SearchByAffiliation searches for publications by affiliation (university)
func (c *CrossRefClient) SearchByAffiliation(ctx context.Context, affiliation string, limit int) ([]*database.Publication, error) {
if limit <= 0 {
limit = 20
}
endpoint := fmt.Sprintf("%s/works?query.affiliation=%s&rows=%d&sort=published&order=desc",
c.baseURL, url.QueryEscape(affiliation), limit)
return c.searchWorks(ctx, endpoint)
}
// SearchByORCID searches for publications by ORCID
func (c *CrossRefClient) SearchByORCID(ctx context.Context, orcid string, limit int) ([]*database.Publication, error) {
if limit <= 0 {
limit = 100
}
// ORCID format: 0000-0000-0000-0000
orcid = strings.TrimPrefix(orcid, "https://orcid.org/")
endpoint := fmt.Sprintf("%s/works?filter=orcid:%s&rows=%d&sort=published&order=desc",
c.baseURL, url.QueryEscape(orcid), limit)
return c.searchWorks(ctx, endpoint)
}
// SearchByTitle searches for publications by title
func (c *CrossRefClient) SearchByTitle(ctx context.Context, title string, limit int) ([]*database.Publication, error) {
if limit <= 0 {
limit = 10
}
endpoint := fmt.Sprintf("%s/works?query.title=%s&rows=%d",
c.baseURL, url.QueryEscape(title), limit)
return c.searchWorks(ctx, endpoint)
}
// searchWorks performs a generic search
func (c *CrossRefClient) searchWorks(ctx context.Context, endpoint string) ([]*database.Publication, error) {
req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", c.userAgent)
resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("CrossRef API error: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
var result CrossRefResponse
if err := json.Unmarshal(body, &result); err != nil {
return nil, err
}
var pubs []*database.Publication
for _, work := range result.Message.Items {
pubs = append(pubs, c.convertToPub(&work))
}
return pubs, nil
}
// convertToPub converts a CrossRef work to our Publication model
func (c *CrossRefClient) convertToPub(work *CrossRefWork) *database.Publication {
pub := &database.Publication{
ID: uuid.New(),
CitationCount: work.IsCitedByCount,
CrawledAt: time.Now(),
}
// Title
if len(work.Title) > 0 {
pub.Title = work.Title[0]
}
// DOI
if work.DOI != "" {
pub.DOI = &work.DOI
}
// URL
if work.URL != "" {
pub.URL = &work.URL
}
// Abstract (clean HTML)
if work.Abstract != "" {
abstract := cleanHTML(work.Abstract)
pub.Abstract = &abstract
}
// Year
if len(work.Issued.DateParts) > 0 && len(work.Issued.DateParts[0]) > 0 {
year := work.Issued.DateParts[0][0]
pub.Year = &year
if len(work.Issued.DateParts[0]) > 1 {
month := work.Issued.DateParts[0][1]
pub.Month = &month
}
}
// Type
pubType := mapCrossRefType(work.Type)
pub.PubType = &pubType
// Venue
if len(work.ContainerTitle) > 0 {
venue := work.ContainerTitle[0]
pub.Venue = &venue
}
// Publisher
if work.Publisher != "" {
pub.Publisher = &work.Publisher
}
// ISBN
if len(work.ISBN) > 0 {
pub.ISBN = &work.ISBN[0]
}
// ISSN
if len(work.ISSN) > 0 {
pub.ISSN = &work.ISSN[0]
}
// Keywords/Subjects
if len(work.Subject) > 0 {
pub.Keywords = work.Subject
}
// PDF URL
for _, link := range work.Link {
if strings.Contains(link.ContentType, "pdf") {
pub.PDFURL = &link.URL
break
}
}
// Authors
var authors []string
for _, author := range work.Author {
name := strings.TrimSpace(author.Given + " " + author.Family)
if name != "" {
authors = append(authors, name)
}
}
pub.Authors = authors
// Source
source := "crossref"
pub.Source = &source
// Store raw data
rawData, _ := json.Marshal(work)
pub.RawData = rawData
return pub
}
// mapCrossRefType maps CrossRef types to our types
func mapCrossRefType(crType string) string {
switch crType {
case "journal-article":
return "journal"
case "proceedings-article", "conference-paper":
return "conference"
case "book":
return "book"
case "book-chapter":
return "book_chapter"
case "dissertation":
return "thesis"
case "posted-content":
return "preprint"
default:
return "other"
}
}
// cleanHTML removes HTML tags from text
func cleanHTML(html string) string {
// Simple HTML tag removal
result := html
result = strings.ReplaceAll(result, "<jats:p>", "")
result = strings.ReplaceAll(result, "</jats:p>", " ")
result = strings.ReplaceAll(result, "<jats:italic>", "")
result = strings.ReplaceAll(result, "</jats:italic>", "")
result = strings.ReplaceAll(result, "<jats:bold>", "")
result = strings.ReplaceAll(result, "</jats:bold>", "")
result = strings.ReplaceAll(result, "<p>", "")
result = strings.ReplaceAll(result, "</p>", " ")
// Collapse whitespace
result = strings.Join(strings.Fields(result), " ")
return strings.TrimSpace(result)
}

View File

@@ -0,0 +1,268 @@
package publications
import (
"context"
"fmt"
"log"
"sync"
"time"
"github.com/breakpilot/edu-search-service/internal/database"
"github.com/google/uuid"
)
// PublicationCrawler crawls publications for university staff
type PublicationCrawler struct {
repo *database.Repository
crossref *CrossRefClient
rateLimit time.Duration
mu sync.Mutex
lastRequest time.Time
}
// CrawlResult contains the result of a publication crawl
type CrawlResult struct {
StaffID uuid.UUID
PubsFound int
PubsNew int
PubsUpdated int
Errors []string
Duration time.Duration
}
// NewPublicationCrawler creates a new publication crawler
func NewPublicationCrawler(repo *database.Repository, email string) *PublicationCrawler {
return &PublicationCrawler{
repo: repo,
crossref: NewCrossRefClient(email),
rateLimit: time.Second, // CrossRef polite pool: 50 req/sec max
}
}
// CrawlForStaff crawls publications for a single staff member
func (c *PublicationCrawler) CrawlForStaff(ctx context.Context, staff *database.UniversityStaff) (*CrawlResult, error) {
start := time.Now()
result := &CrawlResult{
StaffID: staff.ID,
}
log.Printf("Starting publication crawl for %s", *staff.FullName)
var pubs []*database.Publication
// Strategy 1: Search by ORCID (most reliable)
if staff.ORCID != nil && *staff.ORCID != "" {
c.waitForRateLimit()
orcidPubs, err := c.crossref.SearchByORCID(ctx, *staff.ORCID, 100)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("ORCID search error: %v", err))
} else {
pubs = append(pubs, orcidPubs...)
log.Printf("Found %d publications via ORCID for %s", len(orcidPubs), *staff.FullName)
}
}
// Strategy 2: Search by author name
if staff.FullName != nil && *staff.FullName != "" {
c.waitForRateLimit()
namePubs, err := c.crossref.SearchByAuthor(ctx, *staff.FullName, 50)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("Name search error: %v", err))
} else {
// Deduplicate
for _, pub := range namePubs {
if !containsPub(pubs, pub) {
pubs = append(pubs, pub)
}
}
log.Printf("Found %d additional publications via name search for %s", len(namePubs), *staff.FullName)
}
}
// Save publications and create links
for _, pub := range pubs {
// Save publication
err := c.repo.CreatePublication(ctx, pub)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("Save error for %s: %v", pub.Title, err))
continue
}
result.PubsFound++
// Link to staff
link := &database.StaffPublication{
StaffID: staff.ID,
PublicationID: pub.ID,
}
// Determine author position
pos := findAuthorPosition(pub, staff)
if pos > 0 {
link.AuthorPosition = &pos
}
if err := c.repo.LinkStaffPublication(ctx, link); err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("Link error: %v", err))
}
}
result.Duration = time.Since(start)
log.Printf("Completed publication crawl for %s: found=%d, duration=%v",
*staff.FullName, result.PubsFound, result.Duration)
return result, nil
}
// CrawlForUniversity crawls publications for all staff at a university
func (c *PublicationCrawler) CrawlForUniversity(ctx context.Context, uniID uuid.UUID, limit int) (*database.UniversityCrawlStatus, error) {
log.Printf("Starting publication crawl for university %s", uniID)
// Get staff with ORCID first (more reliable)
params := database.StaffSearchParams{
UniversityID: &uniID,
Limit: limit,
}
result, err := c.repo.SearchStaff(ctx, params)
if err != nil {
return nil, err
}
status := &database.UniversityCrawlStatus{
UniversityID: uniID,
PubCrawlStatus: "running",
}
var totalPubs int
var errors []string
for _, staff := range result.Staff {
select {
case <-ctx.Done():
status.PubCrawlStatus = "cancelled"
status.PubErrors = append(errors, "Crawl cancelled")
return status, ctx.Err()
default:
}
crawlResult, err := c.CrawlForStaff(ctx, &staff)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: %v", staff.LastName, err))
continue
}
totalPubs += crawlResult.PubsFound
errors = append(errors, crawlResult.Errors...)
}
now := time.Now()
status.LastPubCrawl = &now
status.PubCrawlStatus = "completed"
status.PubCount = totalPubs
status.PubErrors = errors
// Update status in database
if err := c.repo.UpdateCrawlStatus(ctx, status); err != nil {
log.Printf("Warning: Failed to update crawl status: %v", err)
}
log.Printf("Completed publication crawl for university %s: %d publications found", uniID, totalPubs)
return status, nil
}
// ResolveDOI resolves a DOI and saves the publication
func (c *PublicationCrawler) ResolveDOI(ctx context.Context, doi string) (*database.Publication, error) {
c.waitForRateLimit()
pub, err := c.crossref.GetWorkByDOI(ctx, doi)
if err != nil {
return nil, err
}
if err := c.repo.CreatePublication(ctx, pub); err != nil {
return nil, err
}
return pub, nil
}
// waitForRateLimit enforces rate limiting
func (c *PublicationCrawler) waitForRateLimit() {
c.mu.Lock()
defer c.mu.Unlock()
elapsed := time.Since(c.lastRequest)
if elapsed < c.rateLimit {
time.Sleep(c.rateLimit - elapsed)
}
c.lastRequest = time.Now()
}
// containsPub checks if a publication is already in the list (by DOI or title)
func containsPub(pubs []*database.Publication, pub *database.Publication) bool {
for _, existing := range pubs {
// Check DOI
if pub.DOI != nil && existing.DOI != nil && *pub.DOI == *existing.DOI {
return true
}
// Check title (rough match)
if pub.Title == existing.Title {
return true
}
}
return false
}
// findAuthorPosition finds the position of a staff member in the author list
func findAuthorPosition(pub *database.Publication, staff *database.UniversityStaff) int {
for i, author := range pub.Authors {
// Check if author name matches staff
if staff.LastName != "" && containsIgnoreCase(author, staff.LastName) {
return i + 1
}
}
return 0
}
// containsIgnoreCase checks if s contains substr (case insensitive)
func containsIgnoreCase(s, substr string) bool {
return len(s) >= len(substr) &&
(s == substr ||
len(substr) == 0 ||
(len(s) > 0 && containsIgnoreCaseHelper(s, substr)))
}
func containsIgnoreCaseHelper(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if equalFold(s[i:i+len(substr)], substr) {
return true
}
}
return false
}
func equalFold(s1, s2 string) bool {
if len(s1) != len(s2) {
return false
}
for i := 0; i < len(s1); i++ {
c1, c2 := s1[i], s2[i]
if c1 != c2 {
// Simple ASCII case folding
if c1 >= 'A' && c1 <= 'Z' {
c1 += 'a' - 'A'
}
if c2 >= 'A' && c2 <= 'Z' {
c2 += 'a' - 'A'
}
if c1 != c2 {
return false
}
}
}
return true
}

View File

@@ -0,0 +1,188 @@
package publications
import (
"testing"
"github.com/breakpilot/edu-search-service/internal/database"
)
func TestContainsPub_ByDOI(t *testing.T) {
doi1 := "10.1000/test1"
doi2 := "10.1000/test2"
doi3 := "10.1000/test3"
pubs := []*database.Publication{
{Title: "Paper 1", DOI: &doi1},
{Title: "Paper 2", DOI: &doi2},
}
tests := []struct {
name string
pub *database.Publication
expected bool
}{
{
name: "DOI exists in list",
pub: &database.Publication{Title: "Different Title", DOI: &doi1},
expected: true,
},
{
name: "DOI does not exist",
pub: &database.Publication{Title: "New Paper", DOI: &doi3},
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := containsPub(pubs, tt.pub)
if result != tt.expected {
t.Errorf("Expected %v, got %v", tt.expected, result)
}
})
}
}
func TestContainsPub_ByTitle(t *testing.T) {
pubs := []*database.Publication{
{Title: "Machine Learning Applications"},
{Title: "Deep Neural Networks"},
}
tests := []struct {
name string
pub *database.Publication
expected bool
}{
{
name: "Title exists in list",
pub: &database.Publication{Title: "Machine Learning Applications"},
expected: true,
},
{
name: "Title does not exist",
pub: &database.Publication{Title: "New Research Paper"},
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := containsPub(pubs, tt.pub)
if result != tt.expected {
t.Errorf("Expected %v, got %v", tt.expected, result)
}
})
}
}
func TestContainsIgnoreCase(t *testing.T) {
tests := []struct {
name string
s string
substr string
expected bool
}{
{"Exact match", "Hello World", "Hello", true},
{"Case insensitive", "Hello World", "hello", true},
{"Case insensitive uppercase", "HELLO WORLD", "world", true},
{"Substring in middle", "The quick brown fox", "brown", true},
{"No match", "Hello World", "xyz", false},
{"Empty substring", "Hello", "", true},
{"Empty string", "", "test", false},
{"Both empty", "", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := containsIgnoreCase(tt.s, tt.substr)
if result != tt.expected {
t.Errorf("containsIgnoreCase(%q, %q) = %v, expected %v",
tt.s, tt.substr, result, tt.expected)
}
})
}
}
func TestEqualFold(t *testing.T) {
tests := []struct {
name string
s1 string
s2 string
expected bool
}{
{"Same string", "hello", "hello", true},
{"Different case", "Hello", "hello", true},
{"All uppercase", "HELLO", "hello", true},
{"Mixed case", "HeLLo", "hEllO", true},
{"Different strings", "hello", "world", false},
{"Different length", "hello", "hi", false},
{"Empty strings", "", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := equalFold(tt.s1, tt.s2)
if result != tt.expected {
t.Errorf("equalFold(%q, %q) = %v, expected %v",
tt.s1, tt.s2, result, tt.expected)
}
})
}
}
func TestFindAuthorPosition(t *testing.T) {
pub := &database.Publication{
Title: "Test Paper",
Authors: []string{
"John Smith",
"Maria Müller",
"Hans Weber",
},
}
tests := []struct {
name string
staff *database.UniversityStaff
expected int
}{
{
name: "First author",
staff: &database.UniversityStaff{
LastName: "Smith",
},
expected: 1,
},
{
name: "Second author",
staff: &database.UniversityStaff{
LastName: "Müller",
},
expected: 2,
},
{
name: "Third author",
staff: &database.UniversityStaff{
LastName: "Weber",
},
expected: 3,
},
{
name: "Author not found",
staff: &database.UniversityStaff{
LastName: "Unknown",
},
expected: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := findAuthorPosition(pub, tt.staff)
if result != tt.expected {
t.Errorf("Expected position %d, got %d for author %s",
tt.expected, result, tt.staff.LastName)
}
})
}
}