fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
369
edu-search-service/internal/publications/crossref_client.go
Normal file
369
edu-search-service/internal/publications/crossref_client.go
Normal file
@@ -0,0 +1,369 @@
|
||||
package publications
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/breakpilot/edu-search-service/internal/database"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// CrossRefClient is a client for the CrossRef API
|
||||
type CrossRefClient struct {
|
||||
client *http.Client
|
||||
baseURL string
|
||||
userAgent string
|
||||
email string // For polite pool access
|
||||
}
|
||||
|
||||
// CrossRefResponse represents the top-level API response
|
||||
type CrossRefResponse struct {
|
||||
Status string `json:"status"`
|
||||
MessageType string `json:"message-type"`
|
||||
MessageVersion string `json:"message-version"`
|
||||
Message CrossRefResult `json:"message"`
|
||||
}
|
||||
|
||||
// CrossRefResult contains the actual results
|
||||
type CrossRefResult struct {
|
||||
TotalResults int `json:"total-results"`
|
||||
Items []CrossRefWork `json:"items"`
|
||||
Query *CrossRefQuery `json:"query,omitempty"`
|
||||
}
|
||||
|
||||
// CrossRefQuery contains query info
|
||||
type CrossRefQuery struct {
|
||||
StartIndex int `json:"start-index"`
|
||||
SearchTerms string `json:"search-terms"`
|
||||
}
|
||||
|
||||
// CrossRefWork represents a single work/publication
|
||||
type CrossRefWork struct {
|
||||
DOI string `json:"DOI"`
|
||||
Title []string `json:"title"`
|
||||
ContainerTitle []string `json:"container-title"`
|
||||
Publisher string `json:"publisher"`
|
||||
Type string `json:"type"`
|
||||
Author []CrossRefAuthor `json:"author"`
|
||||
Issued CrossRefDate `json:"issued"`
|
||||
PublishedPrint CrossRefDate `json:"published-print"`
|
||||
Abstract string `json:"abstract"`
|
||||
URL string `json:"URL"`
|
||||
Link []CrossRefLink `json:"link"`
|
||||
Subject []string `json:"subject"`
|
||||
ISSN []string `json:"ISSN"`
|
||||
ISBN []string `json:"ISBN"`
|
||||
IsCitedByCount int `json:"is-referenced-by-count"`
|
||||
}
|
||||
|
||||
// CrossRefAuthor represents an author
|
||||
type CrossRefAuthor struct {
|
||||
Given string `json:"given"`
|
||||
Family string `json:"family"`
|
||||
ORCID string `json:"ORCID"`
|
||||
Affiliation []struct {
|
||||
Name string `json:"name"`
|
||||
} `json:"affiliation"`
|
||||
Sequence string `json:"sequence"` // "first" or "additional"
|
||||
}
|
||||
|
||||
// CrossRefDate represents a date
|
||||
type CrossRefDate struct {
|
||||
DateParts [][]int `json:"date-parts"`
|
||||
}
|
||||
|
||||
// CrossRefLink represents a link to the work
|
||||
type CrossRefLink struct {
|
||||
URL string `json:"URL"`
|
||||
ContentType string `json:"content-type"`
|
||||
}
|
||||
|
||||
// NewCrossRefClient creates a new CrossRef API client
|
||||
func NewCrossRefClient(email string) *CrossRefClient {
|
||||
return &CrossRefClient{
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
baseURL: "https://api.crossref.org",
|
||||
userAgent: "BreakPilot-EduBot/1.0 (https://breakpilot.de; mailto:" + email + ")",
|
||||
email: email,
|
||||
}
|
||||
}
|
||||
|
||||
// GetWorkByDOI retrieves a work by its DOI
|
||||
func (c *CrossRefClient) GetWorkByDOI(ctx context.Context, doi string) (*database.Publication, error) {
|
||||
// Clean DOI
|
||||
doi = strings.TrimSpace(doi)
|
||||
doi = strings.TrimPrefix(doi, "https://doi.org/")
|
||||
doi = strings.TrimPrefix(doi, "http://doi.org/")
|
||||
|
||||
endpoint := fmt.Sprintf("%s/works/%s", c.baseURL, url.PathEscape(doi))
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", c.userAgent)
|
||||
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
return nil, fmt.Errorf("DOI not found: %s", doi)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("CrossRef API error: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Status string `json:"status"`
|
||||
Message CrossRefWork `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return c.convertToPub(&result.Message), nil
|
||||
}
|
||||
|
||||
// SearchByAuthor searches for publications by author name
|
||||
func (c *CrossRefClient) SearchByAuthor(ctx context.Context, authorName string, limit int) ([]*database.Publication, error) {
|
||||
if limit <= 0 {
|
||||
limit = 20
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf("%s/works?query.author=%s&rows=%d&sort=published&order=desc",
|
||||
c.baseURL, url.QueryEscape(authorName), limit)
|
||||
|
||||
return c.searchWorks(ctx, endpoint)
|
||||
}
|
||||
|
||||
// SearchByAffiliation searches for publications by affiliation (university)
|
||||
func (c *CrossRefClient) SearchByAffiliation(ctx context.Context, affiliation string, limit int) ([]*database.Publication, error) {
|
||||
if limit <= 0 {
|
||||
limit = 20
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf("%s/works?query.affiliation=%s&rows=%d&sort=published&order=desc",
|
||||
c.baseURL, url.QueryEscape(affiliation), limit)
|
||||
|
||||
return c.searchWorks(ctx, endpoint)
|
||||
}
|
||||
|
||||
// SearchByORCID searches for publications by ORCID
|
||||
func (c *CrossRefClient) SearchByORCID(ctx context.Context, orcid string, limit int) ([]*database.Publication, error) {
|
||||
if limit <= 0 {
|
||||
limit = 100
|
||||
}
|
||||
|
||||
// ORCID format: 0000-0000-0000-0000
|
||||
orcid = strings.TrimPrefix(orcid, "https://orcid.org/")
|
||||
|
||||
endpoint := fmt.Sprintf("%s/works?filter=orcid:%s&rows=%d&sort=published&order=desc",
|
||||
c.baseURL, url.QueryEscape(orcid), limit)
|
||||
|
||||
return c.searchWorks(ctx, endpoint)
|
||||
}
|
||||
|
||||
// SearchByTitle searches for publications by title
|
||||
func (c *CrossRefClient) SearchByTitle(ctx context.Context, title string, limit int) ([]*database.Publication, error) {
|
||||
if limit <= 0 {
|
||||
limit = 10
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf("%s/works?query.title=%s&rows=%d",
|
||||
c.baseURL, url.QueryEscape(title), limit)
|
||||
|
||||
return c.searchWorks(ctx, endpoint)
|
||||
}
|
||||
|
||||
// searchWorks performs a generic search
|
||||
func (c *CrossRefClient) searchWorks(ctx context.Context, endpoint string) ([]*database.Publication, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", c.userAgent)
|
||||
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("CrossRef API error: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var result CrossRefResponse
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pubs []*database.Publication
|
||||
for _, work := range result.Message.Items {
|
||||
pubs = append(pubs, c.convertToPub(&work))
|
||||
}
|
||||
|
||||
return pubs, nil
|
||||
}
|
||||
|
||||
// convertToPub converts a CrossRef work to our Publication model
|
||||
func (c *CrossRefClient) convertToPub(work *CrossRefWork) *database.Publication {
|
||||
pub := &database.Publication{
|
||||
ID: uuid.New(),
|
||||
CitationCount: work.IsCitedByCount,
|
||||
CrawledAt: time.Now(),
|
||||
}
|
||||
|
||||
// Title
|
||||
if len(work.Title) > 0 {
|
||||
pub.Title = work.Title[0]
|
||||
}
|
||||
|
||||
// DOI
|
||||
if work.DOI != "" {
|
||||
pub.DOI = &work.DOI
|
||||
}
|
||||
|
||||
// URL
|
||||
if work.URL != "" {
|
||||
pub.URL = &work.URL
|
||||
}
|
||||
|
||||
// Abstract (clean HTML)
|
||||
if work.Abstract != "" {
|
||||
abstract := cleanHTML(work.Abstract)
|
||||
pub.Abstract = &abstract
|
||||
}
|
||||
|
||||
// Year
|
||||
if len(work.Issued.DateParts) > 0 && len(work.Issued.DateParts[0]) > 0 {
|
||||
year := work.Issued.DateParts[0][0]
|
||||
pub.Year = &year
|
||||
if len(work.Issued.DateParts[0]) > 1 {
|
||||
month := work.Issued.DateParts[0][1]
|
||||
pub.Month = &month
|
||||
}
|
||||
}
|
||||
|
||||
// Type
|
||||
pubType := mapCrossRefType(work.Type)
|
||||
pub.PubType = &pubType
|
||||
|
||||
// Venue
|
||||
if len(work.ContainerTitle) > 0 {
|
||||
venue := work.ContainerTitle[0]
|
||||
pub.Venue = &venue
|
||||
}
|
||||
|
||||
// Publisher
|
||||
if work.Publisher != "" {
|
||||
pub.Publisher = &work.Publisher
|
||||
}
|
||||
|
||||
// ISBN
|
||||
if len(work.ISBN) > 0 {
|
||||
pub.ISBN = &work.ISBN[0]
|
||||
}
|
||||
|
||||
// ISSN
|
||||
if len(work.ISSN) > 0 {
|
||||
pub.ISSN = &work.ISSN[0]
|
||||
}
|
||||
|
||||
// Keywords/Subjects
|
||||
if len(work.Subject) > 0 {
|
||||
pub.Keywords = work.Subject
|
||||
}
|
||||
|
||||
// PDF URL
|
||||
for _, link := range work.Link {
|
||||
if strings.Contains(link.ContentType, "pdf") {
|
||||
pub.PDFURL = &link.URL
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Authors
|
||||
var authors []string
|
||||
for _, author := range work.Author {
|
||||
name := strings.TrimSpace(author.Given + " " + author.Family)
|
||||
if name != "" {
|
||||
authors = append(authors, name)
|
||||
}
|
||||
}
|
||||
pub.Authors = authors
|
||||
|
||||
// Source
|
||||
source := "crossref"
|
||||
pub.Source = &source
|
||||
|
||||
// Store raw data
|
||||
rawData, _ := json.Marshal(work)
|
||||
pub.RawData = rawData
|
||||
|
||||
return pub
|
||||
}
|
||||
|
||||
// mapCrossRefType maps CrossRef types to our types
|
||||
func mapCrossRefType(crType string) string {
|
||||
switch crType {
|
||||
case "journal-article":
|
||||
return "journal"
|
||||
case "proceedings-article", "conference-paper":
|
||||
return "conference"
|
||||
case "book":
|
||||
return "book"
|
||||
case "book-chapter":
|
||||
return "book_chapter"
|
||||
case "dissertation":
|
||||
return "thesis"
|
||||
case "posted-content":
|
||||
return "preprint"
|
||||
default:
|
||||
return "other"
|
||||
}
|
||||
}
|
||||
|
||||
// cleanHTML removes HTML tags from text
|
||||
func cleanHTML(html string) string {
|
||||
// Simple HTML tag removal
|
||||
result := html
|
||||
result = strings.ReplaceAll(result, "<jats:p>", "")
|
||||
result = strings.ReplaceAll(result, "</jats:p>", " ")
|
||||
result = strings.ReplaceAll(result, "<jats:italic>", "")
|
||||
result = strings.ReplaceAll(result, "</jats:italic>", "")
|
||||
result = strings.ReplaceAll(result, "<jats:bold>", "")
|
||||
result = strings.ReplaceAll(result, "</jats:bold>", "")
|
||||
result = strings.ReplaceAll(result, "<p>", "")
|
||||
result = strings.ReplaceAll(result, "</p>", " ")
|
||||
|
||||
// Collapse whitespace
|
||||
result = strings.Join(strings.Fields(result), " ")
|
||||
|
||||
return strings.TrimSpace(result)
|
||||
}
|
||||
268
edu-search-service/internal/publications/pub_crawler.go
Normal file
268
edu-search-service/internal/publications/pub_crawler.go
Normal file
@@ -0,0 +1,268 @@
|
||||
package publications
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/breakpilot/edu-search-service/internal/database"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// PublicationCrawler crawls publications for university staff
|
||||
type PublicationCrawler struct {
|
||||
repo *database.Repository
|
||||
crossref *CrossRefClient
|
||||
rateLimit time.Duration
|
||||
mu sync.Mutex
|
||||
lastRequest time.Time
|
||||
}
|
||||
|
||||
// CrawlResult contains the result of a publication crawl
|
||||
type CrawlResult struct {
|
||||
StaffID uuid.UUID
|
||||
PubsFound int
|
||||
PubsNew int
|
||||
PubsUpdated int
|
||||
Errors []string
|
||||
Duration time.Duration
|
||||
}
|
||||
|
||||
// NewPublicationCrawler creates a new publication crawler
|
||||
func NewPublicationCrawler(repo *database.Repository, email string) *PublicationCrawler {
|
||||
return &PublicationCrawler{
|
||||
repo: repo,
|
||||
crossref: NewCrossRefClient(email),
|
||||
rateLimit: time.Second, // CrossRef polite pool: 50 req/sec max
|
||||
}
|
||||
}
|
||||
|
||||
// CrawlForStaff crawls publications for a single staff member
|
||||
func (c *PublicationCrawler) CrawlForStaff(ctx context.Context, staff *database.UniversityStaff) (*CrawlResult, error) {
|
||||
start := time.Now()
|
||||
result := &CrawlResult{
|
||||
StaffID: staff.ID,
|
||||
}
|
||||
|
||||
log.Printf("Starting publication crawl for %s", *staff.FullName)
|
||||
|
||||
var pubs []*database.Publication
|
||||
|
||||
// Strategy 1: Search by ORCID (most reliable)
|
||||
if staff.ORCID != nil && *staff.ORCID != "" {
|
||||
c.waitForRateLimit()
|
||||
orcidPubs, err := c.crossref.SearchByORCID(ctx, *staff.ORCID, 100)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("ORCID search error: %v", err))
|
||||
} else {
|
||||
pubs = append(pubs, orcidPubs...)
|
||||
log.Printf("Found %d publications via ORCID for %s", len(orcidPubs), *staff.FullName)
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 2: Search by author name
|
||||
if staff.FullName != nil && *staff.FullName != "" {
|
||||
c.waitForRateLimit()
|
||||
namePubs, err := c.crossref.SearchByAuthor(ctx, *staff.FullName, 50)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Name search error: %v", err))
|
||||
} else {
|
||||
// Deduplicate
|
||||
for _, pub := range namePubs {
|
||||
if !containsPub(pubs, pub) {
|
||||
pubs = append(pubs, pub)
|
||||
}
|
||||
}
|
||||
log.Printf("Found %d additional publications via name search for %s", len(namePubs), *staff.FullName)
|
||||
}
|
||||
}
|
||||
|
||||
// Save publications and create links
|
||||
for _, pub := range pubs {
|
||||
// Save publication
|
||||
err := c.repo.CreatePublication(ctx, pub)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Save error for %s: %v", pub.Title, err))
|
||||
continue
|
||||
}
|
||||
|
||||
result.PubsFound++
|
||||
|
||||
// Link to staff
|
||||
link := &database.StaffPublication{
|
||||
StaffID: staff.ID,
|
||||
PublicationID: pub.ID,
|
||||
}
|
||||
|
||||
// Determine author position
|
||||
pos := findAuthorPosition(pub, staff)
|
||||
if pos > 0 {
|
||||
link.AuthorPosition = &pos
|
||||
}
|
||||
|
||||
if err := c.repo.LinkStaffPublication(ctx, link); err != nil {
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Link error: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
result.Duration = time.Since(start)
|
||||
|
||||
log.Printf("Completed publication crawl for %s: found=%d, duration=%v",
|
||||
*staff.FullName, result.PubsFound, result.Duration)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// CrawlForUniversity crawls publications for all staff at a university
|
||||
func (c *PublicationCrawler) CrawlForUniversity(ctx context.Context, uniID uuid.UUID, limit int) (*database.UniversityCrawlStatus, error) {
|
||||
log.Printf("Starting publication crawl for university %s", uniID)
|
||||
|
||||
// Get staff with ORCID first (more reliable)
|
||||
params := database.StaffSearchParams{
|
||||
UniversityID: &uniID,
|
||||
Limit: limit,
|
||||
}
|
||||
|
||||
result, err := c.repo.SearchStaff(ctx, params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
status := &database.UniversityCrawlStatus{
|
||||
UniversityID: uniID,
|
||||
PubCrawlStatus: "running",
|
||||
}
|
||||
|
||||
var totalPubs int
|
||||
var errors []string
|
||||
|
||||
for _, staff := range result.Staff {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
status.PubCrawlStatus = "cancelled"
|
||||
status.PubErrors = append(errors, "Crawl cancelled")
|
||||
return status, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
crawlResult, err := c.CrawlForStaff(ctx, &staff)
|
||||
if err != nil {
|
||||
errors = append(errors, fmt.Sprintf("%s: %v", staff.LastName, err))
|
||||
continue
|
||||
}
|
||||
|
||||
totalPubs += crawlResult.PubsFound
|
||||
errors = append(errors, crawlResult.Errors...)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
status.LastPubCrawl = &now
|
||||
status.PubCrawlStatus = "completed"
|
||||
status.PubCount = totalPubs
|
||||
status.PubErrors = errors
|
||||
|
||||
// Update status in database
|
||||
if err := c.repo.UpdateCrawlStatus(ctx, status); err != nil {
|
||||
log.Printf("Warning: Failed to update crawl status: %v", err)
|
||||
}
|
||||
|
||||
log.Printf("Completed publication crawl for university %s: %d publications found", uniID, totalPubs)
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// ResolveDOI resolves a DOI and saves the publication
|
||||
func (c *PublicationCrawler) ResolveDOI(ctx context.Context, doi string) (*database.Publication, error) {
|
||||
c.waitForRateLimit()
|
||||
|
||||
pub, err := c.crossref.GetWorkByDOI(ctx, doi)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := c.repo.CreatePublication(ctx, pub); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return pub, nil
|
||||
}
|
||||
|
||||
// waitForRateLimit enforces rate limiting
|
||||
func (c *PublicationCrawler) waitForRateLimit() {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
elapsed := time.Since(c.lastRequest)
|
||||
if elapsed < c.rateLimit {
|
||||
time.Sleep(c.rateLimit - elapsed)
|
||||
}
|
||||
|
||||
c.lastRequest = time.Now()
|
||||
}
|
||||
|
||||
// containsPub checks if a publication is already in the list (by DOI or title)
|
||||
func containsPub(pubs []*database.Publication, pub *database.Publication) bool {
|
||||
for _, existing := range pubs {
|
||||
// Check DOI
|
||||
if pub.DOI != nil && existing.DOI != nil && *pub.DOI == *existing.DOI {
|
||||
return true
|
||||
}
|
||||
// Check title (rough match)
|
||||
if pub.Title == existing.Title {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// findAuthorPosition finds the position of a staff member in the author list
|
||||
func findAuthorPosition(pub *database.Publication, staff *database.UniversityStaff) int {
|
||||
for i, author := range pub.Authors {
|
||||
// Check if author name matches staff
|
||||
if staff.LastName != "" && containsIgnoreCase(author, staff.LastName) {
|
||||
return i + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// containsIgnoreCase checks if s contains substr (case insensitive)
|
||||
func containsIgnoreCase(s, substr string) bool {
|
||||
return len(s) >= len(substr) &&
|
||||
(s == substr ||
|
||||
len(substr) == 0 ||
|
||||
(len(s) > 0 && containsIgnoreCaseHelper(s, substr)))
|
||||
}
|
||||
|
||||
func containsIgnoreCaseHelper(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if equalFold(s[i:i+len(substr)], substr) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func equalFold(s1, s2 string) bool {
|
||||
if len(s1) != len(s2) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(s1); i++ {
|
||||
c1, c2 := s1[i], s2[i]
|
||||
if c1 != c2 {
|
||||
// Simple ASCII case folding
|
||||
if c1 >= 'A' && c1 <= 'Z' {
|
||||
c1 += 'a' - 'A'
|
||||
}
|
||||
if c2 >= 'A' && c2 <= 'Z' {
|
||||
c2 += 'a' - 'A'
|
||||
}
|
||||
if c1 != c2 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
188
edu-search-service/internal/publications/pub_crawler_test.go
Normal file
188
edu-search-service/internal/publications/pub_crawler_test.go
Normal file
@@ -0,0 +1,188 @@
|
||||
package publications
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/breakpilot/edu-search-service/internal/database"
|
||||
)
|
||||
|
||||
func TestContainsPub_ByDOI(t *testing.T) {
|
||||
doi1 := "10.1000/test1"
|
||||
doi2 := "10.1000/test2"
|
||||
doi3 := "10.1000/test3"
|
||||
|
||||
pubs := []*database.Publication{
|
||||
{Title: "Paper 1", DOI: &doi1},
|
||||
{Title: "Paper 2", DOI: &doi2},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
pub *database.Publication
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "DOI exists in list",
|
||||
pub: &database.Publication{Title: "Different Title", DOI: &doi1},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "DOI does not exist",
|
||||
pub: &database.Publication{Title: "New Paper", DOI: &doi3},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := containsPub(pubs, tt.pub)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %v, got %v", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainsPub_ByTitle(t *testing.T) {
|
||||
pubs := []*database.Publication{
|
||||
{Title: "Machine Learning Applications"},
|
||||
{Title: "Deep Neural Networks"},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
pub *database.Publication
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "Title exists in list",
|
||||
pub: &database.Publication{Title: "Machine Learning Applications"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Title does not exist",
|
||||
pub: &database.Publication{Title: "New Research Paper"},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := containsPub(pubs, tt.pub)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %v, got %v", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainsIgnoreCase(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
s string
|
||||
substr string
|
||||
expected bool
|
||||
}{
|
||||
{"Exact match", "Hello World", "Hello", true},
|
||||
{"Case insensitive", "Hello World", "hello", true},
|
||||
{"Case insensitive uppercase", "HELLO WORLD", "world", true},
|
||||
{"Substring in middle", "The quick brown fox", "brown", true},
|
||||
{"No match", "Hello World", "xyz", false},
|
||||
{"Empty substring", "Hello", "", true},
|
||||
{"Empty string", "", "test", false},
|
||||
{"Both empty", "", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := containsIgnoreCase(tt.s, tt.substr)
|
||||
if result != tt.expected {
|
||||
t.Errorf("containsIgnoreCase(%q, %q) = %v, expected %v",
|
||||
tt.s, tt.substr, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEqualFold(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
s1 string
|
||||
s2 string
|
||||
expected bool
|
||||
}{
|
||||
{"Same string", "hello", "hello", true},
|
||||
{"Different case", "Hello", "hello", true},
|
||||
{"All uppercase", "HELLO", "hello", true},
|
||||
{"Mixed case", "HeLLo", "hEllO", true},
|
||||
{"Different strings", "hello", "world", false},
|
||||
{"Different length", "hello", "hi", false},
|
||||
{"Empty strings", "", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := equalFold(tt.s1, tt.s2)
|
||||
if result != tt.expected {
|
||||
t.Errorf("equalFold(%q, %q) = %v, expected %v",
|
||||
tt.s1, tt.s2, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAuthorPosition(t *testing.T) {
|
||||
pub := &database.Publication{
|
||||
Title: "Test Paper",
|
||||
Authors: []string{
|
||||
"John Smith",
|
||||
"Maria Müller",
|
||||
"Hans Weber",
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
staff *database.UniversityStaff
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
name: "First author",
|
||||
staff: &database.UniversityStaff{
|
||||
LastName: "Smith",
|
||||
},
|
||||
expected: 1,
|
||||
},
|
||||
{
|
||||
name: "Second author",
|
||||
staff: &database.UniversityStaff{
|
||||
LastName: "Müller",
|
||||
},
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "Third author",
|
||||
staff: &database.UniversityStaff{
|
||||
LastName: "Weber",
|
||||
},
|
||||
expected: 3,
|
||||
},
|
||||
{
|
||||
name: "Author not found",
|
||||
staff: &database.UniversityStaff{
|
||||
LastName: "Unknown",
|
||||
},
|
||||
expected: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := findAuthorPosition(pub, tt.staff)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected position %d, got %d for author %s",
|
||||
tt.expected, result, tt.staff.LastName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user