This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/ai-compliance-sdk/internal/roadmap/parser.go
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

541 lines
14 KiB
Go

package roadmap
import (
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/xuri/excelize/v2"
)
// Parser handles file parsing for roadmap imports
type Parser struct{}
// NewParser creates a new parser
func NewParser() *Parser {
return &Parser{}
}
// ColumnMapping defines expected column names and their variations
var ColumnMapping = map[string][]string{
"title": {"title", "titel", "name", "bezeichnung", "massnahme", "maßnahme", "aufgabe", "task"},
"description": {"description", "beschreibung", "details", "inhalt", "content"},
"category": {"category", "kategorie", "bereich", "type", "typ"},
"priority": {"priority", "priorität", "prioritaet", "prio", "dringlichkeit"},
"status": {"status", "stand", "zustand"},
"control_id": {"control_id", "control", "kontrolle", "massnahme_id", "ctrl"},
"regulation_ref": {"regulation", "regulation_ref", "verordnung", "gesetz", "artikel", "article", "gdpr_ref"},
"gap_id": {"gap_id", "gap", "luecke", "lücke"},
"effort_days": {"effort_days", "effort", "aufwand", "tage", "days", "pt", "personentage"},
"assignee": {"assignee", "verantwortlich", "zustaendig", "zuständig", "owner", "responsible"},
"department": {"department", "abteilung", "bereich", "team"},
"planned_start": {"planned_start", "start", "beginn", "startdatum", "start_date"},
"planned_end": {"planned_end", "end", "ende", "enddatum", "end_date", "deadline", "frist"},
"notes": {"notes", "notizen", "bemerkungen", "kommentar", "comment", "anmerkungen"},
}
// DetectedColumn represents a detected column mapping
type DetectedColumn struct {
Index int `json:"index"`
Header string `json:"header"`
MappedTo string `json:"mapped_to"`
Confidence float64 `json:"confidence"`
}
// ParseResult contains the result of parsing a file
type ParseResult struct {
Format ImportFormat `json:"format"`
TotalRows int `json:"total_rows"`
ValidRows int `json:"valid_rows"`
InvalidRows int `json:"invalid_rows"`
Columns []DetectedColumn `json:"columns"`
Items []ParsedItem `json:"items"`
Errors []string `json:"errors"`
}
// ParseFile detects format and parses the file
func (p *Parser) ParseFile(data []byte, filename string, contentType string) (*ParseResult, error) {
format := p.detectFormat(filename, contentType)
switch format {
case ImportFormatExcel:
return p.parseExcel(data)
case ImportFormatCSV:
return p.parseCSV(data)
case ImportFormatJSON:
return p.parseJSON(data)
default:
return nil, fmt.Errorf("unsupported file format: %s", filename)
}
}
// detectFormat detects the file format
func (p *Parser) detectFormat(filename string, contentType string) ImportFormat {
filename = strings.ToLower(filename)
if strings.HasSuffix(filename, ".xlsx") || strings.HasSuffix(filename, ".xls") {
return ImportFormatExcel
}
if strings.HasSuffix(filename, ".csv") {
return ImportFormatCSV
}
if strings.HasSuffix(filename, ".json") {
return ImportFormatJSON
}
// Check content type
switch contentType {
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel":
return ImportFormatExcel
case "text/csv":
return ImportFormatCSV
case "application/json":
return ImportFormatJSON
}
return ""
}
// parseExcel parses an Excel file
func (p *Parser) parseExcel(data []byte) (*ParseResult, error) {
result := &ParseResult{
Format: ImportFormatExcel,
}
f, err := excelize.OpenReader(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("failed to open Excel file: %w", err)
}
defer f.Close()
// Get the first sheet
sheets := f.GetSheetList()
if len(sheets) == 0 {
return nil, fmt.Errorf("no sheets found in Excel file")
}
rows, err := f.GetRows(sheets[0])
if err != nil {
return nil, fmt.Errorf("failed to read rows: %w", err)
}
if len(rows) < 2 {
return nil, fmt.Errorf("file must have at least a header row and one data row")
}
// Detect column mappings from header
headers := rows[0]
result.Columns = p.detectColumns(headers)
// Parse data rows
for i, row := range rows[1:] {
rowNum := i + 2 // 1-based, skip header
item := p.parseRow(row, result.Columns, rowNum)
result.Items = append(result.Items, item)
result.TotalRows++
if item.IsValid {
result.ValidRows++
} else {
result.InvalidRows++
}
}
return result, nil
}
// parseCSV parses a CSV file
func (p *Parser) parseCSV(data []byte) (*ParseResult, error) {
result := &ParseResult{
Format: ImportFormatCSV,
}
reader := csv.NewReader(bytes.NewReader(data))
reader.LazyQuotes = true
reader.TrimLeadingSpace = true
// Try different delimiters
delimiters := []rune{',', ';', '\t'}
var records [][]string
var err error
for _, delim := range delimiters {
reader = csv.NewReader(bytes.NewReader(data))
reader.Comma = delim
reader.LazyQuotes = true
records, err = reader.ReadAll()
if err == nil && len(records) > 0 && len(records[0]) > 1 {
break
}
}
if err != nil {
return nil, fmt.Errorf("failed to parse CSV: %w", err)
}
if len(records) < 2 {
return nil, fmt.Errorf("file must have at least a header row and one data row")
}
// Detect column mappings from header
headers := records[0]
result.Columns = p.detectColumns(headers)
// Parse data rows
for i, row := range records[1:] {
rowNum := i + 2
item := p.parseRow(row, result.Columns, rowNum)
result.Items = append(result.Items, item)
result.TotalRows++
if item.IsValid {
result.ValidRows++
} else {
result.InvalidRows++
}
}
return result, nil
}
// parseJSON parses a JSON file
func (p *Parser) parseJSON(data []byte) (*ParseResult, error) {
result := &ParseResult{
Format: ImportFormatJSON,
}
// Try parsing as array of items
var items []map[string]interface{}
if err := json.Unmarshal(data, &items); err != nil {
// Try parsing as object with items array
var wrapper struct {
Items []map[string]interface{} `json:"items"`
}
if err := json.Unmarshal(data, &wrapper); err != nil {
return nil, fmt.Errorf("failed to parse JSON: %w", err)
}
items = wrapper.Items
}
if len(items) == 0 {
return nil, fmt.Errorf("no items found in JSON file")
}
// Detect columns from first item
headers := make([]string, 0)
for key := range items[0] {
headers = append(headers, key)
}
result.Columns = p.detectColumns(headers)
// Parse items
for i, itemMap := range items {
rowNum := i + 1
// Convert map to row slice
row := make([]string, len(result.Columns))
for j, col := range result.Columns {
if val, ok := itemMap[col.Header]; ok {
row[j] = fmt.Sprintf("%v", val)
}
}
item := p.parseRow(row, result.Columns, rowNum)
result.Items = append(result.Items, item)
result.TotalRows++
if item.IsValid {
result.ValidRows++
} else {
result.InvalidRows++
}
}
return result, nil
}
// detectColumns detects column mappings from headers
func (p *Parser) detectColumns(headers []string) []DetectedColumn {
columns := make([]DetectedColumn, len(headers))
for i, header := range headers {
columns[i] = DetectedColumn{
Index: i,
Header: header,
Confidence: 0,
}
headerLower := strings.ToLower(strings.TrimSpace(header))
// Try to match against known column names
for fieldName, variations := range ColumnMapping {
for _, variation := range variations {
if headerLower == variation || strings.Contains(headerLower, variation) {
if headerLower == variation {
columns[i].MappedTo = fieldName
columns[i].Confidence = 1.0
} else if columns[i].Confidence < 0.8 {
columns[i].MappedTo = fieldName
columns[i].Confidence = 0.8
}
break
}
}
if columns[i].Confidence >= 1.0 {
break
}
}
}
return columns
}
// parseRow parses a single row into a ParsedItem
func (p *Parser) parseRow(row []string, columns []DetectedColumn, rowNum int) ParsedItem {
item := ParsedItem{
RowNumber: rowNum,
IsValid: true,
Data: RoadmapItemInput{},
}
// Build a map for easy access
values := make(map[string]string)
for i, col := range columns {
if i < len(row) && col.MappedTo != "" {
values[col.MappedTo] = strings.TrimSpace(row[i])
}
}
// Extract title (required)
if title, ok := values["title"]; ok && title != "" {
item.Data.Title = title
} else {
item.IsValid = false
item.Errors = append(item.Errors, "Titel/Title ist erforderlich")
}
// Extract optional fields
if desc, ok := values["description"]; ok {
item.Data.Description = desc
}
// Category
if cat, ok := values["category"]; ok && cat != "" {
item.Data.Category = p.parseCategory(cat)
if item.Data.Category == "" {
item.Warnings = append(item.Warnings, fmt.Sprintf("Unbekannte Kategorie: %s", cat))
item.Data.Category = ItemCategoryTechnical
}
}
// Priority
if prio, ok := values["priority"]; ok && prio != "" {
item.Data.Priority = p.parsePriority(prio)
if item.Data.Priority == "" {
item.Warnings = append(item.Warnings, fmt.Sprintf("Unbekannte Priorität: %s", prio))
item.Data.Priority = ItemPriorityMedium
}
}
// Status
if status, ok := values["status"]; ok && status != "" {
item.Data.Status = p.parseStatus(status)
if item.Data.Status == "" {
item.Warnings = append(item.Warnings, fmt.Sprintf("Unbekannter Status: %s", status))
item.Data.Status = ItemStatusPlanned
}
}
// Control ID
if ctrl, ok := values["control_id"]; ok {
item.Data.ControlID = ctrl
}
// Regulation reference
if reg, ok := values["regulation_ref"]; ok {
item.Data.RegulationRef = reg
}
// Gap ID
if gap, ok := values["gap_id"]; ok {
item.Data.GapID = gap
}
// Effort
if effort, ok := values["effort_days"]; ok && effort != "" {
if days, err := strconv.Atoi(effort); err == nil {
item.Data.EffortDays = &days
}
}
// Assignee
if assignee, ok := values["assignee"]; ok {
item.Data.AssigneeName = assignee
}
// Department
if dept, ok := values["department"]; ok {
item.Data.Department = dept
}
// Dates
if startStr, ok := values["planned_start"]; ok && startStr != "" {
if start := p.parseDate(startStr); start != nil {
item.Data.PlannedStart = start
}
}
if endStr, ok := values["planned_end"]; ok && endStr != "" {
if end := p.parseDate(endStr); end != nil {
item.Data.PlannedEnd = end
}
}
// Notes
if notes, ok := values["notes"]; ok {
item.Data.Notes = notes
}
return item
}
// parseCategory converts a string to ItemCategory
func (p *Parser) parseCategory(s string) ItemCategory {
s = strings.ToLower(strings.TrimSpace(s))
switch {
case strings.Contains(s, "tech"):
return ItemCategoryTechnical
case strings.Contains(s, "org"):
return ItemCategoryOrganizational
case strings.Contains(s, "proz") || strings.Contains(s, "process"):
return ItemCategoryProcessual
case strings.Contains(s, "dok") || strings.Contains(s, "doc"):
return ItemCategoryDocumentation
case strings.Contains(s, "train") || strings.Contains(s, "schul"):
return ItemCategoryTraining
default:
return ""
}
}
// parsePriority converts a string to ItemPriority
func (p *Parser) parsePriority(s string) ItemPriority {
s = strings.ToLower(strings.TrimSpace(s))
switch {
case strings.Contains(s, "crit") || strings.Contains(s, "krit") || s == "1":
return ItemPriorityCritical
case strings.Contains(s, "high") || strings.Contains(s, "hoch") || s == "2":
return ItemPriorityHigh
case strings.Contains(s, "med") || strings.Contains(s, "mitt") || s == "3":
return ItemPriorityMedium
case strings.Contains(s, "low") || strings.Contains(s, "nied") || s == "4":
return ItemPriorityLow
default:
return ""
}
}
// parseStatus converts a string to ItemStatus
func (p *Parser) parseStatus(s string) ItemStatus {
s = strings.ToLower(strings.TrimSpace(s))
switch {
case strings.Contains(s, "plan") || strings.Contains(s, "offen") || strings.Contains(s, "open"):
return ItemStatusPlanned
case strings.Contains(s, "progress") || strings.Contains(s, "lauf") || strings.Contains(s, "arbeit"):
return ItemStatusInProgress
case strings.Contains(s, "block") || strings.Contains(s, "wart"):
return ItemStatusBlocked
case strings.Contains(s, "complet") || strings.Contains(s, "done") || strings.Contains(s, "fertig") || strings.Contains(s, "erledigt"):
return ItemStatusCompleted
case strings.Contains(s, "defer") || strings.Contains(s, "zurück") || strings.Contains(s, "verschob"):
return ItemStatusDeferred
default:
return ""
}
}
// parseDate attempts to parse various date formats
func (p *Parser) parseDate(s string) *time.Time {
s = strings.TrimSpace(s)
if s == "" {
return nil
}
formats := []string{
"2006-01-02",
"02.01.2006",
"2.1.2006",
"02/01/2006",
"2/1/2006",
"01/02/2006",
"1/2/2006",
"2006/01/02",
time.RFC3339,
}
for _, format := range formats {
if t, err := time.Parse(format, s); err == nil {
return &t
}
}
return nil
}
// ValidateAndEnrich validates parsed items and enriches them with mappings
func (p *Parser) ValidateAndEnrich(items []ParsedItem, controls []string, regulations []string, gaps []string) []ParsedItem {
// Build lookup maps
controlSet := make(map[string]bool)
for _, c := range controls {
controlSet[strings.ToLower(c)] = true
}
regSet := make(map[string]bool)
for _, r := range regulations {
regSet[strings.ToLower(r)] = true
}
gapSet := make(map[string]bool)
for _, g := range gaps {
gapSet[strings.ToLower(g)] = true
}
for i := range items {
item := &items[i]
// Validate control ID
if item.Data.ControlID != "" {
if controlSet[strings.ToLower(item.Data.ControlID)] {
item.MatchedControl = item.Data.ControlID
item.MatchConfidence = 1.0
} else {
item.Warnings = append(item.Warnings, fmt.Sprintf("Control '%s' nicht im Katalog gefunden", item.Data.ControlID))
}
}
// Validate regulation reference
if item.Data.RegulationRef != "" {
if regSet[strings.ToLower(item.Data.RegulationRef)] {
item.MatchedRegulation = item.Data.RegulationRef
}
}
// Validate gap ID
if item.Data.GapID != "" {
if gapSet[strings.ToLower(item.Data.GapID)] {
item.MatchedGap = item.Data.GapID
} else {
item.Warnings = append(item.Warnings, fmt.Sprintf("Gap '%s' nicht im Mapping gefunden", item.Data.GapID))
}
}
}
return items
}