breakpilot-compliance/ai-compliance-sdk/internal/roadmap/parser.go

package roadmap

import (
	"bytes"
	"encoding/csv"
	"encoding/json"
	"fmt"
	"strings"

	"github.com/xuri/excelize/v2"
)

// Parser handles file parsing for roadmap imports
type Parser struct{}

// NewParser creates a new parser
func NewParser() *Parser {
	return &Parser{}
}

// ColumnMapping defines expected column names and their variations
var ColumnMapping = map[string][]string{
	"title":          {"title", "titel", "name", "bezeichnung", "massnahme", "maßnahme", "aufgabe", "task"},
	"description":    {"description", "beschreibung", "details", "inhalt", "content"},
	"category":       {"category", "kategorie", "bereich", "type", "typ"},
	"priority":       {"priority", "priorität", "prioritaet", "prio", "dringlichkeit"},
	"status":         {"status", "stand", "zustand"},
	"control_id":     {"control_id", "control", "kontrolle", "massnahme_id", "ctrl"},
	"regulation_ref": {"regulation", "regulation_ref", "verordnung", "gesetz", "artikel", "article", "gdpr_ref"},
	"gap_id":         {"gap_id", "gap", "luecke", "lücke"},
	"effort_days":    {"effort_days", "effort", "aufwand", "tage", "days", "pt", "personentage"},
	"assignee":       {"assignee", "verantwortlich", "zustaendig", "zuständig", "owner", "responsible"},
	"department":     {"department", "abteilung", "bereich", "team"},
	"planned_start":  {"planned_start", "start", "beginn", "startdatum", "start_date"},
	"planned_end":    {"planned_end", "end", "ende", "enddatum", "end_date", "deadline", "frist"},
	"notes":          {"notes", "notizen", "bemerkungen", "kommentar", "comment", "anmerkungen"},
}

// DetectedColumn represents a detected column mapping
type DetectedColumn struct {
	Index      int     `json:"index"`
	Header     string  `json:"header"`
	MappedTo   string  `json:"mapped_to"`
	Confidence float64 `json:"confidence"`
}

// ParseResult contains the result of parsing a file
type ParseResult struct {
	Format      ImportFormat     `json:"format"`
	TotalRows   int              `json:"total_rows"`
	ValidRows   int              `json:"valid_rows"`
	InvalidRows int              `json:"invalid_rows"`
	Columns     []DetectedColumn `json:"columns"`
	Items       []ParsedItem     `json:"items"`
	Errors      []string         `json:"errors"`
}

// ParseFile detects format and parses the file
func (p *Parser) ParseFile(data []byte, filename string, contentType string) (*ParseResult, error) {
	format := p.detectFormat(filename, contentType)

	switch format {
	case ImportFormatExcel:
		return p.parseExcel(data)
	case ImportFormatCSV:
		return p.parseCSV(data)
	case ImportFormatJSON:
		return p.parseJSON(data)
	default:
		return nil, fmt.Errorf("unsupported file format: %s", filename)
	}
}

// detectFormat detects the file format
func (p *Parser) detectFormat(filename string, contentType string) ImportFormat {
	filename = strings.ToLower(filename)

	if strings.HasSuffix(filename, ".xlsx") || strings.HasSuffix(filename, ".xls") {
		return ImportFormatExcel
	}
	if strings.HasSuffix(filename, ".csv") {
		return ImportFormatCSV
	}
	if strings.HasSuffix(filename, ".json") {
		return ImportFormatJSON
	}

	switch contentType {
	case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
		"application/vnd.ms-excel":
		return ImportFormatExcel
	case "text/csv":
		return ImportFormatCSV
	case "application/json":
		return ImportFormatJSON
	}

	return ""
}

// parseExcel parses an Excel file
func (p *Parser) parseExcel(data []byte) (*ParseResult, error) {
	result := &ParseResult{
		Format: ImportFormatExcel,
	}

	f, err := excelize.OpenReader(bytes.NewReader(data))
	if err != nil {
		return nil, fmt.Errorf("failed to open Excel file: %w", err)
	}
	defer f.Close()

	sheets := f.GetSheetList()
	if len(sheets) == 0 {
		return nil, fmt.Errorf("no sheets found in Excel file")
	}

	rows, err := f.GetRows(sheets[0])
	if err != nil {
		return nil, fmt.Errorf("failed to read rows: %w", err)
	}

	if len(rows) < 2 {
		return nil, fmt.Errorf("file must have at least a header row and one data row")
	}

	headers := rows[0]
	result.Columns = p.detectColumns(headers)

	for i, row := range rows[1:] {
		rowNum := i + 2
		item := p.parseRow(row, result.Columns, rowNum)
		result.Items = append(result.Items, item)
		result.TotalRows++
		if item.IsValid {
			result.ValidRows++
		} else {
			result.InvalidRows++
		}
	}

	return result, nil
}

// parseCSV parses a CSV file
func (p *Parser) parseCSV(data []byte) (*ParseResult, error) {
	result := &ParseResult{
		Format: ImportFormatCSV,
	}

	reader := csv.NewReader(bytes.NewReader(data))
	reader.LazyQuotes = true
	reader.TrimLeadingSpace = true

	delimiters := []rune{',', ';', '\t'}
	var records [][]string
	var err error

	for _, delim := range delimiters {
		reader = csv.NewReader(bytes.NewReader(data))
		reader.Comma = delim
		reader.LazyQuotes = true

		records, err = reader.ReadAll()
		if err == nil && len(records) > 0 && len(records[0]) > 1 {
			break
		}
	}

	if err != nil {
		return nil, fmt.Errorf("failed to parse CSV: %w", err)
	}

	if len(records) < 2 {
		return nil, fmt.Errorf("file must have at least a header row and one data row")
	}

	headers := records[0]
	result.Columns = p.detectColumns(headers)

	for i, row := range records[1:] {
		rowNum := i + 2
		item := p.parseRow(row, result.Columns, rowNum)
		result.Items = append(result.Items, item)
		result.TotalRows++
		if item.IsValid {
			result.ValidRows++
		} else {
			result.InvalidRows++
		}
	}

	return result, nil
}

// parseJSON parses a JSON file
func (p *Parser) parseJSON(data []byte) (*ParseResult, error) {
	result := &ParseResult{
		Format: ImportFormatJSON,
	}

	var items []map[string]interface{}
	if err := json.Unmarshal(data, &items); err != nil {
		var wrapper struct {
			Items []map[string]interface{} `json:"items"`
		}
		if err := json.Unmarshal(data, &wrapper); err != nil {
			return nil, fmt.Errorf("failed to parse JSON: %w", err)
		}
		items = wrapper.Items
	}

	if len(items) == 0 {
		return nil, fmt.Errorf("no items found in JSON file")
	}

	headers := make([]string, 0)
	for key := range items[0] {
		headers = append(headers, key)
	}
	result.Columns = p.detectColumns(headers)

	for i, itemMap := range items {
		rowNum := i + 1

		row := make([]string, len(result.Columns))
		for j, col := range result.Columns {
			if val, ok := itemMap[col.Header]; ok {
				row[j] = fmt.Sprintf("%v", val)
			}
		}

		item := p.parseRow(row, result.Columns, rowNum)
		result.Items = append(result.Items, item)
		result.TotalRows++
		if item.IsValid {
			result.ValidRows++
		} else {
			result.InvalidRows++
		}
	}

	return result, nil
}

// detectColumns detects column mappings from headers
func (p *Parser) detectColumns(headers []string) []DetectedColumn {
	columns := make([]DetectedColumn, len(headers))

	for i, header := range headers {
		columns[i] = DetectedColumn{
			Index:      i,
			Header:     header,
			Confidence: 0,
		}

		headerLower := strings.ToLower(strings.TrimSpace(header))

		for fieldName, variations := range ColumnMapping {
			for _, variation := range variations {
				if headerLower == variation || strings.Contains(headerLower, variation) {
					if headerLower == variation {
						columns[i].MappedTo = fieldName
						columns[i].Confidence = 1.0
					} else if columns[i].Confidence < 0.8 {
						columns[i].MappedTo = fieldName
						columns[i].Confidence = 0.8
					}
					break
				}
			}
			if columns[i].Confidence >= 1.0 {
				break
			}
		}
	}

	return columns
}