package ucca import "strings" // source_role is the FUNCTIONAL role of a chunk — WHAT must be done (obligation), // HOW to implement it (operational/procedural requirement, control standard, // implementation guidance), or how to READ the norm (interpretation/definition). // It is ORTHOGONAL to source_class (legal authority): source_class decides RANK, // source_role decides CONTROL-POOL membership for implementation questions. // Derived deterministically from markers, so the untagged corpus needs no re-tag. const ( roleObligation = "obligation" // the abstract duty (the WHAT) roleOperationalReq = "operational_requirement" // concrete binding requirement (CRA Annex I) roleProceduralReq = "procedural_requirement" // a process: notification/registration/DPIA/incident report roleControlStandard = "control_standard" // best-practice control catalog (NIST/OWASP/ISO/CIS) roleImplGuidance = "implementation_guidance" // advisory how-to (ENISA good practices, BSI) roleInterpretation = "interpretation" // interprets the norm's MEANING (EDPB guideline) roleDefinition = "definition" // definitions / scope / recitals ) var ( proceduralMarkers = []string{ "Meldung", "Meldepflicht", "Notification", "Notifizierung", "Registrierung", "Registration", "Konformitätserklärung", "Declaration of Conformity", "Incident", "Berichterstattung", "Reporting", "Folgenabschätzung", "DSFA", "DPIA", "Anzeigepflicht", } annexMarkers = []string{"Anhang", "Annex", "Appendix", "Anlage"} operationalMarkers = []string{"Anforderung", "Requirement", "essential", "wesentliche"} implMarkers = []string{ "Good Practice", "Best Practice", "Standards Mapping", "Umsetzung", "Implementation", "Handreichung", "Maßnahmenkatalog", "ICS", "SCADA", "Technical Guideline", "TIG", } definitionMarkers = []string{"Begriffsbestimmung", "Definition"} ) // classifyRole derives the functional source_role from chunk metadata + the authority // class. technical_standard is always a control_standard; guidance splits into // implementation_guidance (how-to) vs interpretation (meaning); binding splits into // procedural / operational requirement / definition / plain obligation. func classifyRole(r LegalSearchResult) string { cls := classifyAuthority(r).sourceClass hay := strings.ToLower(r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.Article) switch { case r.IsRecital: return roleDefinition case cls == "technical_standard": return roleControlStandard case cls == "supervisory_guidance": if containsAnyLower(hay, implMarkers) { return roleImplGuidance } return roleInterpretation case cls == "binding_law": switch { case containsAnyLower(hay, definitionMarkers): return roleDefinition case containsAnyLower(hay, proceduralMarkers): return roleProceduralReq case containsAnyLower(hay, annexMarkers) || containsAnyLower(hay, operationalMarkers): return roleOperationalReq default: return roleObligation } default: return roleObligation } } // controlRoleBonus is the soft intra-pool preference (User 2026-06-24): // operational_requirement > procedural_requirement > control_standard > implementation_guidance. var controlRoleBonus = map[string]float64{ roleOperationalReq: 0.100, roleProceduralReq: 0.075, roleControlStandard: 0.050, roleImplGuidance: 0.000, } // controlPoolGain lifts EVERY control-pool role over the non-control roles (obligation/ // interpretation/definition) on an implementation question, so the binding abstract // obligation does not dominate by authority alone. The obligation is not removed — it // stays visible as "Rechtsgrundlage" context below the recommended measures. const controlPoolGain = 0.15 // applyControlRoles boosts the control-pool (the four implementation roles) for an // EXPLICIT implementation question, soft-ordered op_req > procedural > standard > guidance. // Replaces the earlier "lift technical_standard above binding" — controls are not only // technical_standard, and the binding operational_requirement (e.g. CRA Annex I) should win. func applyControlRoles(out []LegalSearchResult) { for i := range out { if bonus, ok := controlRoleBonus[classifyRole(out[i])]; ok { out[i].Score += controlPoolGain + bonus } } } // isControlPoolRole reports whether a role belongs to the control-pool surfaced on // implementation questions (the four "how to implement" roles). func isControlPoolRole(role string) bool { switch role { case roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance: return true } return false } // controlRoleOf classifies a raw Qdrant payload into a source_role, so searchControls can // filter its deep dense pull to the control-pool BEFORE hits are mapped to LegalSearchResult. func controlRoleOf(payload map[string]interface{}) string { article := getString(payload, "article") if article == "" { article = getString(payload, "section") } return classifyRole(LegalSearchResult{ RegulationShort: getString(payload, "regulation_short"), RegulationName: getString(payload, "regulation_name_de"), ArticleLabel: getString(payload, "article_label"), Article: article, Category: getString(payload, "category"), SourceClass: getString(payload, "source_class"), AuthorityWeight: getInt(payload, "authority_weight"), IsRecital: getBool(payload, "is_recital"), }) }