Files
breakpilot-compliance/ai-compliance-sdk/internal/roadmap/parser.go
Sharang Parnerkar 13f57c4519 refactor(go): split obligations, portfolio, rbac, whistleblower handlers and stores, roadmap parser
Split 7 files exceeding the 500 LOC hard cap into 16 files, all under 500 LOC.
No exported symbols renamed; zero behavior changes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 10:00:15 +02:00

280 lines
7.3 KiB
Go

package roadmap
import (
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"strings"
"github.com/xuri/excelize/v2"
)
// Parser handles file parsing for roadmap imports
type Parser struct{}
// NewParser creates a new parser
func NewParser() *Parser {
return &Parser{}
}
// ColumnMapping defines expected column names and their variations
var ColumnMapping = map[string][]string{
"title": {"title", "titel", "name", "bezeichnung", "massnahme", "maßnahme", "aufgabe", "task"},
"description": {"description", "beschreibung", "details", "inhalt", "content"},
"category": {"category", "kategorie", "bereich", "type", "typ"},
"priority": {"priority", "priorität", "prioritaet", "prio", "dringlichkeit"},
"status": {"status", "stand", "zustand"},
"control_id": {"control_id", "control", "kontrolle", "massnahme_id", "ctrl"},
"regulation_ref": {"regulation", "regulation_ref", "verordnung", "gesetz", "artikel", "article", "gdpr_ref"},
"gap_id": {"gap_id", "gap", "luecke", "lücke"},
"effort_days": {"effort_days", "effort", "aufwand", "tage", "days", "pt", "personentage"},
"assignee": {"assignee", "verantwortlich", "zustaendig", "zuständig", "owner", "responsible"},
"department": {"department", "abteilung", "bereich", "team"},
"planned_start": {"planned_start", "start", "beginn", "startdatum", "start_date"},
"planned_end": {"planned_end", "end", "ende", "enddatum", "end_date", "deadline", "frist"},
"notes": {"notes", "notizen", "bemerkungen", "kommentar", "comment", "anmerkungen"},
}
// DetectedColumn represents a detected column mapping
type DetectedColumn struct {
Index int `json:"index"`
Header string `json:"header"`
MappedTo string `json:"mapped_to"`
Confidence float64 `json:"confidence"`
}
// ParseResult contains the result of parsing a file
type ParseResult struct {
Format ImportFormat `json:"format"`
TotalRows int `json:"total_rows"`
ValidRows int `json:"valid_rows"`
InvalidRows int `json:"invalid_rows"`
Columns []DetectedColumn `json:"columns"`
Items []ParsedItem `json:"items"`
Errors []string `json:"errors"`
}
// ParseFile detects format and parses the file
func (p *Parser) ParseFile(data []byte, filename string, contentType string) (*ParseResult, error) {
format := p.detectFormat(filename, contentType)
switch format {
case ImportFormatExcel:
return p.parseExcel(data)
case ImportFormatCSV:
return p.parseCSV(data)
case ImportFormatJSON:
return p.parseJSON(data)
default:
return nil, fmt.Errorf("unsupported file format: %s", filename)
}
}
// detectFormat detects the file format
func (p *Parser) detectFormat(filename string, contentType string) ImportFormat {
filename = strings.ToLower(filename)
if strings.HasSuffix(filename, ".xlsx") || strings.HasSuffix(filename, ".xls") {
return ImportFormatExcel
}
if strings.HasSuffix(filename, ".csv") {
return ImportFormatCSV
}
if strings.HasSuffix(filename, ".json") {
return ImportFormatJSON
}
switch contentType {
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel":
return ImportFormatExcel
case "text/csv":
return ImportFormatCSV
case "application/json":
return ImportFormatJSON
}
return ""
}
// parseExcel parses an Excel file
func (p *Parser) parseExcel(data []byte) (*ParseResult, error) {
result := &ParseResult{
Format: ImportFormatExcel,
}
f, err := excelize.OpenReader(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("failed to open Excel file: %w", err)
}
defer f.Close()
sheets := f.GetSheetList()
if len(sheets) == 0 {
return nil, fmt.Errorf("no sheets found in Excel file")
}
rows, err := f.GetRows(sheets[0])
if err != nil {
return nil, fmt.Errorf("failed to read rows: %w", err)
}
if len(rows) < 2 {
return nil, fmt.Errorf("file must have at least a header row and one data row")
}
headers := rows[0]
result.Columns = p.detectColumns(headers)
for i, row := range rows[1:] {
rowNum := i + 2
item := p.parseRow(row, result.Columns, rowNum)
result.Items = append(result.Items, item)
result.TotalRows++
if item.IsValid {
result.ValidRows++
} else {
result.InvalidRows++
}
}
return result, nil
}
// parseCSV parses a CSV file
func (p *Parser) parseCSV(data []byte) (*ParseResult, error) {
result := &ParseResult{
Format: ImportFormatCSV,
}
reader := csv.NewReader(bytes.NewReader(data))
reader.LazyQuotes = true
reader.TrimLeadingSpace = true
delimiters := []rune{',', ';', '\t'}
var records [][]string
var err error
for _, delim := range delimiters {
reader = csv.NewReader(bytes.NewReader(data))
reader.Comma = delim
reader.LazyQuotes = true
records, err = reader.ReadAll()
if err == nil && len(records) > 0 && len(records[0]) > 1 {
break
}
}
if err != nil {
return nil, fmt.Errorf("failed to parse CSV: %w", err)
}
if len(records) < 2 {
return nil, fmt.Errorf("file must have at least a header row and one data row")
}
headers := records[0]
result.Columns = p.detectColumns(headers)
for i, row := range records[1:] {
rowNum := i + 2
item := p.parseRow(row, result.Columns, rowNum)
result.Items = append(result.Items, item)
result.TotalRows++
if item.IsValid {
result.ValidRows++
} else {
result.InvalidRows++
}
}
return result, nil
}
// parseJSON parses a JSON file
func (p *Parser) parseJSON(data []byte) (*ParseResult, error) {
result := &ParseResult{
Format: ImportFormatJSON,
}
var items []map[string]interface{}
if err := json.Unmarshal(data, &items); err != nil {
var wrapper struct {
Items []map[string]interface{} `json:"items"`
}
if err := json.Unmarshal(data, &wrapper); err != nil {
return nil, fmt.Errorf("failed to parse JSON: %w", err)
}
items = wrapper.Items
}
if len(items) == 0 {
return nil, fmt.Errorf("no items found in JSON file")
}
headers := make([]string, 0)
for key := range items[0] {
headers = append(headers, key)
}
result.Columns = p.detectColumns(headers)
for i, itemMap := range items {
rowNum := i + 1
row := make([]string, len(result.Columns))
for j, col := range result.Columns {
if val, ok := itemMap[col.Header]; ok {
row[j] = fmt.Sprintf("%v", val)
}
}
item := p.parseRow(row, result.Columns, rowNum)
result.Items = append(result.Items, item)
result.TotalRows++
if item.IsValid {
result.ValidRows++
} else {
result.InvalidRows++
}
}
return result, nil
}
// detectColumns detects column mappings from headers
func (p *Parser) detectColumns(headers []string) []DetectedColumn {
columns := make([]DetectedColumn, len(headers))
for i, header := range headers {
columns[i] = DetectedColumn{
Index: i,
Header: header,
Confidence: 0,
}
headerLower := strings.ToLower(strings.TrimSpace(header))
for fieldName, variations := range ColumnMapping {
for _, variation := range variations {
if headerLower == variation || strings.Contains(headerLower, variation) {
if headerLower == variation {
columns[i].MappedTo = fieldName
columns[i].Confidence = 1.0
} else if columns[i].Confidence < 0.8 {
columns[i].MappedTo = fieldName
columns[i].Confidence = 0.8
}
break
}
}
if columns[i].Confidence >= 1.0 {
break
}
}
}
return columns
}