e50892a2aa
CI / detect-changes (push) Successful in 5s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 3s
CI / loc-budget (push) Successful in 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 58s
CI / iace-gt-coverage (push) Successful in 15s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
124 lines
5.4 KiB
Go
124 lines
5.4 KiB
Go
package ucca
|
|
|
|
import "strings"
|
|
|
|
// source_role is the FUNCTIONAL role of a chunk — WHAT must be done (obligation),
|
|
// HOW to implement it (operational/procedural requirement, control standard,
|
|
// implementation guidance), or how to READ the norm (interpretation/definition).
|
|
// It is ORTHOGONAL to source_class (legal authority): source_class decides RANK,
|
|
// source_role decides CONTROL-POOL membership for implementation questions.
|
|
// Derived deterministically from markers, so the untagged corpus needs no re-tag.
|
|
const (
|
|
roleObligation = "obligation" // the abstract duty (the WHAT)
|
|
roleOperationalReq = "operational_requirement" // concrete binding requirement (CRA Annex I)
|
|
roleProceduralReq = "procedural_requirement" // a process: notification/registration/DPIA/incident report
|
|
roleControlStandard = "control_standard" // best-practice control catalog (NIST/OWASP/ISO/CIS)
|
|
roleImplGuidance = "implementation_guidance" // advisory how-to (ENISA good practices, BSI)
|
|
roleInterpretation = "interpretation" // interprets the norm's MEANING (EDPB guideline)
|
|
roleDefinition = "definition" // definitions / scope / recitals
|
|
)
|
|
|
|
var (
|
|
proceduralMarkers = []string{
|
|
"Meldung", "Meldepflicht", "Notification", "Notifizierung", "Registrierung",
|
|
"Registration", "Konformitätserklärung", "Declaration of Conformity", "Incident",
|
|
"Berichterstattung", "Reporting", "Folgenabschätzung", "DSFA", "DPIA", "Anzeigepflicht",
|
|
}
|
|
annexMarkers = []string{"Anhang", "Annex", "Appendix", "Anlage"}
|
|
operationalMarkers = []string{"Anforderung", "Requirement", "essential", "wesentliche"}
|
|
implMarkers = []string{
|
|
"Good Practice", "Best Practice", "Standards Mapping", "Umsetzung", "Implementation",
|
|
"Handreichung", "Maßnahmenkatalog", "ICS", "SCADA", "Technical Guideline", "TIG",
|
|
}
|
|
definitionMarkers = []string{"Begriffsbestimmung", "Definition"}
|
|
)
|
|
|
|
// classifyRole derives the functional source_role from chunk metadata + the authority
|
|
// class. technical_standard is always a control_standard; guidance splits into
|
|
// implementation_guidance (how-to) vs interpretation (meaning); binding splits into
|
|
// procedural / operational requirement / definition / plain obligation.
|
|
func classifyRole(r LegalSearchResult) string {
|
|
cls := classifyAuthority(r).sourceClass
|
|
hay := strings.ToLower(r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.Article)
|
|
switch {
|
|
case r.IsRecital:
|
|
return roleDefinition
|
|
case cls == "technical_standard":
|
|
return roleControlStandard
|
|
case cls == "supervisory_guidance":
|
|
if containsAnyLower(hay, implMarkers) {
|
|
return roleImplGuidance
|
|
}
|
|
return roleInterpretation
|
|
case cls == "binding_law":
|
|
switch {
|
|
case containsAnyLower(hay, definitionMarkers):
|
|
return roleDefinition
|
|
case containsAnyLower(hay, proceduralMarkers):
|
|
return roleProceduralReq
|
|
case containsAnyLower(hay, annexMarkers) || containsAnyLower(hay, operationalMarkers):
|
|
return roleOperationalReq
|
|
default:
|
|
return roleObligation
|
|
}
|
|
default:
|
|
return roleObligation
|
|
}
|
|
}
|
|
|
|
// controlRoleBonus is the soft intra-pool preference (User 2026-06-24):
|
|
// operational_requirement > procedural_requirement > control_standard > implementation_guidance.
|
|
var controlRoleBonus = map[string]float64{
|
|
roleOperationalReq: 0.100,
|
|
roleProceduralReq: 0.075,
|
|
roleControlStandard: 0.050,
|
|
roleImplGuidance: 0.000,
|
|
}
|
|
|
|
// controlPoolGain lifts EVERY control-pool role over the non-control roles (obligation/
|
|
// interpretation/definition) on an implementation question, so the binding abstract
|
|
// obligation does not dominate by authority alone. The obligation is not removed — it
|
|
// stays visible as "Rechtsgrundlage" context below the recommended measures.
|
|
const controlPoolGain = 0.15
|
|
|
|
// applyControlRoles boosts the control-pool (the four implementation roles) for an
|
|
// EXPLICIT implementation question, soft-ordered op_req > procedural > standard > guidance.
|
|
// Replaces the earlier "lift technical_standard above binding" — controls are not only
|
|
// technical_standard, and the binding operational_requirement (e.g. CRA Annex I) should win.
|
|
func applyControlRoles(out []LegalSearchResult) {
|
|
for i := range out {
|
|
if bonus, ok := controlRoleBonus[classifyRole(out[i])]; ok {
|
|
out[i].Score += controlPoolGain + bonus
|
|
}
|
|
}
|
|
}
|
|
|
|
// isControlPoolRole reports whether a role belongs to the control-pool surfaced on
|
|
// implementation questions (the four "how to implement" roles).
|
|
func isControlPoolRole(role string) bool {
|
|
switch role {
|
|
case roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// controlRoleOf classifies a raw Qdrant payload into a source_role, so searchControls can
|
|
// filter its deep dense pull to the control-pool BEFORE hits are mapped to LegalSearchResult.
|
|
func controlRoleOf(payload map[string]interface{}) string {
|
|
article := getString(payload, "article")
|
|
if article == "" {
|
|
article = getString(payload, "section")
|
|
}
|
|
return classifyRole(LegalSearchResult{
|
|
RegulationShort: getString(payload, "regulation_short"),
|
|
RegulationName: getString(payload, "regulation_name_de"),
|
|
ArticleLabel: getString(payload, "article_label"),
|
|
Article: article,
|
|
Category: getString(payload, "category"),
|
|
SourceClass: getString(payload, "source_class"),
|
|
AuthorityWeight: getInt(payload, "authority_weight"),
|
|
IsRecital: getBool(payload, "is_recital"),
|
|
})
|
|
}
|