fix(anchor-finder): use correct Qdrant payload fields (regulation_id, regulation_name_de)
Qdrant collections use regulation_id (not regulation_code), regulation_name_de, guideline_name, download_url etc. Also search bp_compliance_datenschutz collection where OWASP/ENISA docs live. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -32,7 +32,7 @@ EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
|
||||
_OPEN_SOURCE_RULES = {1, 2}
|
||||
|
||||
# Collections to search for anchors (open-source frameworks)
|
||||
_ANCHOR_COLLECTIONS = ["bp_compliance_ce"]
|
||||
_ANCHOR_COLLECTIONS = ["bp_compliance_ce", "bp_compliance_datenschutz"]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -123,9 +123,11 @@ class AnchorFinder:
|
||||
|
||||
for hit in results:
|
||||
payload = hit.get("payload", {})
|
||||
# Qdrant payloads use regulation_id (not regulation_code)
|
||||
regulation_code = (
|
||||
payload.get("regulation_code", "")
|
||||
or payload.get("metadata", {}).get("regulation_code", "")
|
||||
payload.get("regulation_id", "")
|
||||
or payload.get("regulation_code", "")
|
||||
or payload.get("metadata", {}).get("regulation_id", "")
|
||||
)
|
||||
if not regulation_code:
|
||||
continue
|
||||
@@ -136,25 +138,23 @@ class AnchorFinder:
|
||||
continue
|
||||
|
||||
# Build reference key for dedup
|
||||
article = payload.get("article", "") or payload.get("metadata", {}).get("article", "")
|
||||
category = payload.get("category", "") or payload.get("metadata", {}).get("category", "")
|
||||
ref = article or category or ""
|
||||
article = payload.get("article", "") or payload.get("category", "") or ""
|
||||
ref = article
|
||||
key = f"{regulation_code}:{ref}"
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
|
||||
reg_name = (
|
||||
payload.get("regulation_name", "")
|
||||
or payload.get("metadata", {}).get("regulation_name", "")
|
||||
)
|
||||
reg_short = (
|
||||
payload.get("regulation_short", "")
|
||||
or payload.get("metadata", {}).get("regulation_short", "")
|
||||
payload.get("regulation_name_de", "")
|
||||
or payload.get("regulation_name_en", "")
|
||||
or payload.get("guideline_name", "")
|
||||
)
|
||||
reg_short = payload.get("regulation_short", "")
|
||||
source_url = (
|
||||
payload.get("source_url", "")
|
||||
or payload.get("metadata", {}).get("source_url", "")
|
||||
payload.get("download_url", "")
|
||||
or payload.get("source_url", "")
|
||||
or payload.get("source", "")
|
||||
)
|
||||
|
||||
framework_name = license_info.get("name", reg_name or reg_short or regulation_code)
|
||||
|
||||
Reference in New Issue
Block a user