fix(anchor-finder): use correct Qdrant payload fields (regulation_id, regulation_name_de)
Qdrant collections use regulation_id (not regulation_code), regulation_name_de, guideline_name, download_url etc. Also search bp_compliance_datenschutz collection where OWASP/ENISA docs live. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -32,7 +32,7 @@ EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
|
|||||||
_OPEN_SOURCE_RULES = {1, 2}
|
_OPEN_SOURCE_RULES = {1, 2}
|
||||||
|
|
||||||
# Collections to search for anchors (open-source frameworks)
|
# Collections to search for anchors (open-source frameworks)
|
||||||
_ANCHOR_COLLECTIONS = ["bp_compliance_ce"]
|
_ANCHOR_COLLECTIONS = ["bp_compliance_ce", "bp_compliance_datenschutz"]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -123,9 +123,11 @@ class AnchorFinder:
|
|||||||
|
|
||||||
for hit in results:
|
for hit in results:
|
||||||
payload = hit.get("payload", {})
|
payload = hit.get("payload", {})
|
||||||
|
# Qdrant payloads use regulation_id (not regulation_code)
|
||||||
regulation_code = (
|
regulation_code = (
|
||||||
payload.get("regulation_code", "")
|
payload.get("regulation_id", "")
|
||||||
or payload.get("metadata", {}).get("regulation_code", "")
|
or payload.get("regulation_code", "")
|
||||||
|
or payload.get("metadata", {}).get("regulation_id", "")
|
||||||
)
|
)
|
||||||
if not regulation_code:
|
if not regulation_code:
|
||||||
continue
|
continue
|
||||||
@@ -136,25 +138,23 @@ class AnchorFinder:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Build reference key for dedup
|
# Build reference key for dedup
|
||||||
article = payload.get("article", "") or payload.get("metadata", {}).get("article", "")
|
article = payload.get("article", "") or payload.get("category", "") or ""
|
||||||
category = payload.get("category", "") or payload.get("metadata", {}).get("category", "")
|
ref = article
|
||||||
ref = article or category or ""
|
|
||||||
key = f"{regulation_code}:{ref}"
|
key = f"{regulation_code}:{ref}"
|
||||||
if key in seen:
|
if key in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(key)
|
seen.add(key)
|
||||||
|
|
||||||
reg_name = (
|
reg_name = (
|
||||||
payload.get("regulation_name", "")
|
payload.get("regulation_name_de", "")
|
||||||
or payload.get("metadata", {}).get("regulation_name", "")
|
or payload.get("regulation_name_en", "")
|
||||||
)
|
or payload.get("guideline_name", "")
|
||||||
reg_short = (
|
|
||||||
payload.get("regulation_short", "")
|
|
||||||
or payload.get("metadata", {}).get("regulation_short", "")
|
|
||||||
)
|
)
|
||||||
|
reg_short = payload.get("regulation_short", "")
|
||||||
source_url = (
|
source_url = (
|
||||||
payload.get("source_url", "")
|
payload.get("download_url", "")
|
||||||
or payload.get("metadata", {}).get("source_url", "")
|
or payload.get("source_url", "")
|
||||||
|
or payload.get("source", "")
|
||||||
)
|
)
|
||||||
|
|
||||||
framework_name = license_info.get("name", reg_name or reg_short or regulation_code)
|
framework_name = license_info.get("name", reg_name or reg_short or regulation_code)
|
||||||
|
|||||||
Reference in New Issue
Block a user