chore(qa): PDF QA v3 — 6,259/7,943 controls matched (79%)
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 43s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 22s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 43s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 22s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
- Added NIST 800-53, OWASP Top 10/ASVS/SAMM/API/MASVS, ENISA ICS PDFs - Improved normalize() for ligatures, smart quotes, dashes - Added OWASP-specific index builder (A01:2021, V1.1, MASVS-*) - 6,259 article assignments in DB (1,817 article, 1,355 preamble, 1,173 control, 790 annex, 666 section) - Remaining 1,651 unmatched: Blue Guide (EN text vs DE PDF), OWASP multilingual translations (PT/AR/ID/ES) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
104
scripts/qa/apply_pdf_qa_results.py
Normal file
104
scripts/qa/apply_pdf_qa_results.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
"""Apply PDF QA results: update source_citation with correct article + article_type."""
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
RESULTS_FILE = "/tmp/pdf_qa_results.json"
|
||||||
|
|
||||||
|
# Load results
|
||||||
|
with open(RESULTS_FILE) as f:
|
||||||
|
results = json.load(f)
|
||||||
|
print(f"Loaded {len(results)} results")
|
||||||
|
|
||||||
|
# DB connection
|
||||||
|
db_url = os.environ['DATABASE_URL']
|
||||||
|
parsed = urllib.parse.urlparse(db_url)
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host=parsed.hostname, port=parsed.port or 5432,
|
||||||
|
user=parsed.username, password=parsed.password,
|
||||||
|
dbname=parsed.path.lstrip('/'),
|
||||||
|
options="-c search_path=compliance,public"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update in batches
|
||||||
|
cur = conn.cursor()
|
||||||
|
updated = 0
|
||||||
|
errors = 0
|
||||||
|
unchanged = 0
|
||||||
|
|
||||||
|
for i, r in enumerate(results):
|
||||||
|
ctrl_id = r["ctrl_id"]
|
||||||
|
article_label = r["article_label"]
|
||||||
|
article_type = r["article_type"] # preamble, article, annex, section, unknown
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Update source_citation: set article and article_type
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE compliance.canonical_controls
|
||||||
|
SET source_citation = source_citation
|
||||||
|
|| jsonb_build_object('article', %s, 'article_type', %s),
|
||||||
|
updated_at = now()
|
||||||
|
WHERE id = %s::uuid
|
||||||
|
AND (
|
||||||
|
source_citation->>'article' IS DISTINCT FROM %s
|
||||||
|
OR source_citation->>'article_type' IS DISTINCT FROM %s
|
||||||
|
)
|
||||||
|
""", (article_label, article_type, ctrl_id, article_label, article_type))
|
||||||
|
|
||||||
|
if cur.rowcount > 0:
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
unchanged += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
errors += 1
|
||||||
|
if errors <= 5:
|
||||||
|
print(f" ERROR {ctrl_id}: {str(e)[:100]}")
|
||||||
|
conn.rollback()
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (i + 1) % 500 == 0:
|
||||||
|
conn.commit()
|
||||||
|
print(f" Progress: {i+1}/{len(results)} (updated: {updated}, unchanged: {unchanged}, errors: {errors})")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print(f"\nDone: {updated} updated, {unchanged} unchanged, {errors} errors out of {len(results)}")
|
||||||
|
|
||||||
|
# Verify: count by article_type
|
||||||
|
cur.execute("""
|
||||||
|
SELECT source_citation->>'article_type' as art_type, count(*)
|
||||||
|
FROM compliance.canonical_controls
|
||||||
|
WHERE source_citation->>'article_type' IS NOT NULL
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY count(*) DESC
|
||||||
|
""")
|
||||||
|
print("\nArticle type distribution in DB:")
|
||||||
|
for row in cur.fetchall():
|
||||||
|
print(f" {str(row[0]):12s}: {row[1]:5d}")
|
||||||
|
|
||||||
|
# Verify: sample preamble controls
|
||||||
|
cur.execute("""
|
||||||
|
SELECT control_id, source_citation->>'article', source_citation->>'article_type',
|
||||||
|
source_citation->>'source'
|
||||||
|
FROM compliance.canonical_controls
|
||||||
|
WHERE source_citation->>'article_type' = 'preamble'
|
||||||
|
LIMIT 5
|
||||||
|
""")
|
||||||
|
print("\nSample preamble controls:")
|
||||||
|
for row in cur.fetchall():
|
||||||
|
print(f" {row[0]}: {row[1]} ({row[2]}) — {row[3][:40]}")
|
||||||
|
|
||||||
|
# Verify: sample annex controls
|
||||||
|
cur.execute("""
|
||||||
|
SELECT control_id, source_citation->>'article', source_citation->>'article_type',
|
||||||
|
source_citation->>'source'
|
||||||
|
FROM compliance.canonical_controls
|
||||||
|
WHERE source_citation->>'article_type' = 'annex'
|
||||||
|
LIMIT 5
|
||||||
|
""")
|
||||||
|
print("\nSample annex controls:")
|
||||||
|
for row in cur.fetchall():
|
||||||
|
print(f" {row[0]}: {row[1]} ({row[2]}) — {row[3][:40]}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
100
scripts/qa/debug_low_match.py
Normal file
100
scripts/qa/debug_low_match.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
"""Debug low match rates for Blue Guide, OWASP Top 10, CISA."""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import fitz
|
||||||
|
import psycopg2
|
||||||
|
import urllib.parse
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
def normalize(s):
|
||||||
|
s = s.replace('\u00ad', '').replace('\xad', '')
|
||||||
|
s = s.replace('\u200b', '').replace('\u00a0', ' ')
|
||||||
|
s = s.replace('\ufb01', 'fi').replace('\ufb02', 'fl')
|
||||||
|
s = s.replace('\ufb00', 'ff').replace('\ufb03', 'ffi').replace('\ufb04', 'ffl')
|
||||||
|
s = s.replace('\u2019', "'").replace('\u2018', "'")
|
||||||
|
s = s.replace('\u201c', '"').replace('\u201d', '"')
|
||||||
|
s = s.replace('\u2013', '-').replace('\u2014', '-')
|
||||||
|
s = unicodedata.normalize('NFC', s)
|
||||||
|
s = re.sub(r'\s+', ' ', s)
|
||||||
|
return s.strip()
|
||||||
|
|
||||||
|
PDF_DIR = os.path.expanduser("~/rag-ingestion/pdfs")
|
||||||
|
|
||||||
|
db_url = os.environ['DATABASE_URL']
|
||||||
|
parsed = urllib.parse.urlparse(db_url)
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host=parsed.hostname, port=parsed.port or 5432,
|
||||||
|
user=parsed.username, password=parsed.password,
|
||||||
|
dbname=parsed.path.lstrip('/'),
|
||||||
|
options="-c search_path=compliance,public"
|
||||||
|
)
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
for source, filename in [
|
||||||
|
("EU Blue Guide 2022", "blue_guide_2022.pdf"),
|
||||||
|
("OWASP Top 10 (2021)", "owasp_top10_2021.pdf"),
|
||||||
|
]:
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"DEBUG: {source}")
|
||||||
|
|
||||||
|
# Read PDF
|
||||||
|
doc = fitz.open(os.path.join(PDF_DIR, filename))
|
||||||
|
pdf_text = ""
|
||||||
|
for p in doc:
|
||||||
|
pdf_text += p.get_text()
|
||||||
|
pdf_norm = normalize(pdf_text)
|
||||||
|
print(f" PDF: {len(doc)} pages, {len(pdf_text):,} chars, normalized {len(pdf_norm):,}")
|
||||||
|
|
||||||
|
# Get sample NOT-FOUND controls
|
||||||
|
cur.execute("""
|
||||||
|
SELECT control_id, source_original_text
|
||||||
|
FROM compliance.canonical_controls
|
||||||
|
WHERE source_citation->>'source' = %s
|
||||||
|
AND source_original_text IS NOT NULL
|
||||||
|
AND release_state NOT IN ('duplicate', 'too_close')
|
||||||
|
LIMIT 8
|
||||||
|
""", (source,))
|
||||||
|
|
||||||
|
found = 0
|
||||||
|
not_found = 0
|
||||||
|
for row in cur.fetchall():
|
||||||
|
ctrl_id, orig = row
|
||||||
|
orig_norm = normalize(orig)
|
||||||
|
|
||||||
|
# Try standard matching
|
||||||
|
matched = False
|
||||||
|
for start_frac in [0.25, 0.1, 0.5, 0.0]:
|
||||||
|
for length in [80, 60, 40, 30, 20]:
|
||||||
|
start = max(0, int(len(orig_norm) * start_frac))
|
||||||
|
snippet = orig_norm[start:start+length]
|
||||||
|
if len(snippet) < 15:
|
||||||
|
continue
|
||||||
|
if pdf_norm.find(snippet) >= 0:
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
if matched:
|
||||||
|
break
|
||||||
|
|
||||||
|
if matched:
|
||||||
|
found += 1
|
||||||
|
else:
|
||||||
|
not_found += 1
|
||||||
|
print(f"\n {ctrl_id}: NOT FOUND")
|
||||||
|
# Show what the control text looks like
|
||||||
|
print(f" Control (norm, 50-110): '{orig_norm[50:110]}'")
|
||||||
|
# Try to find even a 10-char match
|
||||||
|
for i in range(0, min(len(orig_norm)-10, 200), 10):
|
||||||
|
snippet = orig_norm[i:i+10]
|
||||||
|
pos = pdf_norm.find(snippet)
|
||||||
|
if pos >= 0:
|
||||||
|
print(f" Partial found at ctrl[{i}:{i+10}] = '{snippet}' → PDF pos {pos}")
|
||||||
|
print(f" PDF context: '...{pdf_norm[max(0,pos-20):pos+30]}...'")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# No match at all — check char by char
|
||||||
|
print(f" No 10-char match found. Control text may be from a different source.")
|
||||||
|
print(f" First 100 chars: '{orig_norm[:100]}'")
|
||||||
|
|
||||||
|
print(f"\n Result: {found} found, {not_found} not found")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
@@ -39,22 +39,22 @@ SOURCE_FILE_MAP = {
|
|||||||
"IFRS-Übernahmeverordnung": "ifrs_regulation_2023_1803_de.pdf",
|
"IFRS-Übernahmeverordnung": "ifrs_regulation_2023_1803_de.pdf",
|
||||||
|
|
||||||
# NIST (PDFs)
|
# NIST (PDFs)
|
||||||
"NIST SP 800-53 Rev. 5": None, # TODO: Need to find/download
|
"NIST SP 800-53 Rev. 5": "nist_sp_800_53_r5.pdf",
|
||||||
"NIST SP 800-207 (Zero Trust)": None,
|
"NIST SP 800-207 (Zero Trust)": "nist_sp_800_207.pdf",
|
||||||
"NIST SP 800-63-3": None,
|
"NIST SP 800-63-3": "nist_sp_800_63_3.pdf",
|
||||||
"NIST AI Risk Management Framework": None,
|
"NIST AI Risk Management Framework": "nist_ai_rmf.pdf",
|
||||||
"NIST SP 800-218 (SSDF)": "nist_sp_800_218_ssdf.pdf",
|
"NIST SP 800-218 (SSDF)": "nist_sp_800_218_ssdf.pdf",
|
||||||
"NIST Cybersecurity Framework 2.0": "nist_csf_2_0.pdf",
|
"NIST Cybersecurity Framework 2.0": "nist_csf_2_0.pdf",
|
||||||
|
|
||||||
# OWASP (no PDFs — these are web-based)
|
# OWASP (PDFs)
|
||||||
"OWASP Top 10 (2021)": None,
|
"OWASP Top 10 (2021)": "owasp_top10_2021.pdf",
|
||||||
"OWASP ASVS 4.0": None,
|
"OWASP ASVS 4.0": "owasp_asvs_4_0.pdf",
|
||||||
"OWASP SAMM 2.0": None,
|
"OWASP SAMM 2.0": "owasp_samm_2_0.pdf",
|
||||||
"OWASP API Security Top 10 (2023)": None,
|
"OWASP API Security Top 10 (2023)": "owasp_api_top10_2023.pdf",
|
||||||
"OWASP MASVS 2.0": None,
|
"OWASP MASVS 2.0": "owasp_masvs_2_0.pdf",
|
||||||
|
|
||||||
# ENISA (PDFs)
|
# ENISA (PDFs)
|
||||||
"ENISA ICS/SCADA Dependencies": None,
|
"ENISA ICS/SCADA Dependencies": "enisa_ics_scada.pdf",
|
||||||
"ENISA Supply Chain Good Practices": "enisa_supply_chain_security.pdf",
|
"ENISA Supply Chain Good Practices": "enisa_supply_chain_security.pdf",
|
||||||
"ENISA Threat Landscape Supply Chain": "enisa_supply_chain_security.pdf",
|
"ENISA Threat Landscape Supply Chain": "enisa_supply_chain_security.pdf",
|
||||||
"ENISA Cybersecurity State 2024": None,
|
"ENISA Cybersecurity State 2024": None,
|
||||||
@@ -71,14 +71,14 @@ SOURCE_FILE_MAP = {
|
|||||||
|
|
||||||
# EDPB Guidelines (PDFs)
|
# EDPB Guidelines (PDFs)
|
||||||
"EDPB Leitlinien 01/2022 (BCR)": "edpb_bcr_01_2022.pdf",
|
"EDPB Leitlinien 01/2022 (BCR)": "edpb_bcr_01_2022.pdf",
|
||||||
"EDPB Leitlinien 05/2020 - Einwilligung": None, # txt
|
"EDPB Leitlinien 05/2020 - Einwilligung": "edpb_consent_05_2020.pdf",
|
||||||
"EDPB Leitlinien 08/2020 (Social Media)": "edpb_social_media_08_2020.pdf",
|
"EDPB Leitlinien 08/2020 (Social Media)": "edpb_social_media_08_2020.pdf",
|
||||||
"EDPB Leitlinien 01/2019 (Zertifizierung)": "edpb_certification_01_2019.pdf",
|
"EDPB Leitlinien 01/2019 (Zertifizierung)": "edpb_certification_01_2019.pdf",
|
||||||
"EDPB Leitlinien 07/2020 (Datentransfers)": "edpb_transfers_07_2020.pdf",
|
"EDPB Leitlinien 07/2020 (Datentransfers)": "edpb_transfers_07_2020.pdf",
|
||||||
"EDPB Leitlinien 09/2022 (Data Breach)": "edpb_breach_09_2022.pdf",
|
"EDPB Leitlinien 09/2022 (Data Breach)": "edpb_breach_09_2022.pdf",
|
||||||
"EDPB Leitlinien - Berechtigtes Interesse (Art. 6(1)(f))": "edpb_legitimate_interest.pdf",
|
"EDPB Leitlinien - Berechtigtes Interesse (Art. 6(1)(f))": "edpb_legitimate_interest.pdf",
|
||||||
"EDPB Leitlinien 01/2024 (Berechtigtes Interesse)": "edpb_legitimate_interest.pdf",
|
"EDPB Leitlinien 01/2024 (Berechtigtes Interesse)": "edpb_legitimate_interest.pdf",
|
||||||
"EDPB Leitlinien 04/2019 (Data Protection by Design)": None, # txt
|
"EDPB Leitlinien 04/2019 (Data Protection by Design)": "edpb_dpbd_04_2019.pdf",
|
||||||
"EDPB Leitlinien 01/2020 (Vernetzte Fahrzeuge)": "edpb_connected_vehicles_01_2020.pdf",
|
"EDPB Leitlinien 01/2020 (Vernetzte Fahrzeuge)": "edpb_connected_vehicles_01_2020.pdf",
|
||||||
"EDPB Leitlinien 01/2020 (Datentransfers)": "edpb_transfers_07_2020.pdf",
|
"EDPB Leitlinien 01/2020 (Datentransfers)": "edpb_transfers_07_2020.pdf",
|
||||||
|
|
||||||
@@ -135,10 +135,18 @@ def classify_doc(source_name):
|
|||||||
|
|
||||||
|
|
||||||
def normalize(s):
|
def normalize(s):
|
||||||
"""Remove soft hyphens, normalize whitespace."""
|
"""Remove soft hyphens, normalize whitespace, handle PDF encoding issues."""
|
||||||
s = s.replace('\u00ad', '').replace('\xad', '')
|
s = s.replace('\u00ad', '').replace('\xad', '') # soft hyphen
|
||||||
s = s.replace('\u200b', '').replace('\u00a0', ' ')
|
s = s.replace('\u200b', '').replace('\u00a0', ' ') # zero-width, nbsp
|
||||||
s = s.replace('\ufb01', 'fi').replace('\ufb02', 'fl') # ligatures
|
s = s.replace('\ufb01', 'fi').replace('\ufb02', 'fl') # ligatures
|
||||||
|
s = s.replace('\ufb00', 'ff').replace('\ufb03', 'ffi').replace('\ufb04', 'ffl')
|
||||||
|
s = s.replace('\u2019', "'").replace('\u2018', "'") # smart quotes
|
||||||
|
s = s.replace('\u201c', '"').replace('\u201d', '"')
|
||||||
|
s = s.replace('\u2013', '-').replace('\u2014', '-') # en/em dash
|
||||||
|
s = s.replace('\u2022', '-') # bullet
|
||||||
|
s = s.replace('\u00b7', '-') # middle dot
|
||||||
|
# Remove common PDF artifacts
|
||||||
|
s = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', s) # control chars
|
||||||
s = unicodedata.normalize('NFC', s)
|
s = unicodedata.normalize('NFC', s)
|
||||||
s = re.sub(r'\s+', ' ', s)
|
s = re.sub(r'\s+', ' ', s)
|
||||||
return s.strip()
|
return s.strip()
|
||||||
@@ -248,6 +256,47 @@ def build_nist_index(text):
|
|||||||
return unique
|
return unique
|
||||||
|
|
||||||
|
|
||||||
|
def build_owasp_index(text, source_name):
|
||||||
|
"""Build index for OWASP documents."""
|
||||||
|
items = []
|
||||||
|
|
||||||
|
if "Top 10" in source_name and "API" not in source_name:
|
||||||
|
# OWASP Top 10: A01:2021, A02:2021, etc.
|
||||||
|
for m in re.finditer(r'(A\d{2}:\d{4})', text):
|
||||||
|
items.append((m.start(), m.group(1), "category"))
|
||||||
|
elif "API" in source_name:
|
||||||
|
# OWASP API Top 10: API1:2023, API2:2023, etc.
|
||||||
|
for m in re.finditer(r'(API\d+:\d{4})', text):
|
||||||
|
items.append((m.start(), m.group(1), "category"))
|
||||||
|
elif "ASVS" in source_name:
|
||||||
|
# OWASP ASVS: V1.1, V2.1.1, etc.
|
||||||
|
for m in re.finditer(r'(?:^|\n)\s*(V\d+\.\d+(?:\.\d+)?)\b', text, re.MULTILINE):
|
||||||
|
items.append((m.start(), m.group(1), "requirement"))
|
||||||
|
elif "SAMM" in source_name:
|
||||||
|
# OWASP SAMM: practice names like "Strategy & Metrics", "Education & Guidance"
|
||||||
|
# Use section numbers
|
||||||
|
for m in re.finditer(r'(?:^|\n)\s*(\d+\.\d+(?:\.\d+)?)\s+[A-Z]', text, re.MULTILINE):
|
||||||
|
items.append((m.start(), f"Section {m.group(1)}", "section"))
|
||||||
|
elif "MASVS" in source_name:
|
||||||
|
# OWASP MASVS: MASVS-STORAGE-1, MASVS-CRYPTO-1, etc.
|
||||||
|
for m in re.finditer(r'(MASVS-[A-Z]+-\d+)', text):
|
||||||
|
items.append((m.start(), m.group(1), "requirement"))
|
||||||
|
|
||||||
|
# Fallback: also find generic section numbers
|
||||||
|
if not items:
|
||||||
|
for m in re.finditer(r'(?:^|\n)\s*(\d+\.\d+(?:\.\d+)?)\s+[A-Z]', text, re.MULTILINE):
|
||||||
|
items.append((m.start(), f"Section {m.group(1)}", "section"))
|
||||||
|
|
||||||
|
items.sort(key=lambda x: x[0])
|
||||||
|
seen = set()
|
||||||
|
unique = []
|
||||||
|
for pos, label, typ in items:
|
||||||
|
if label not in seen:
|
||||||
|
seen.add(label)
|
||||||
|
unique.append((pos, label, typ))
|
||||||
|
return unique
|
||||||
|
|
||||||
|
|
||||||
def build_generic_index(text):
|
def build_generic_index(text):
|
||||||
"""Build a generic section index using numbered headings."""
|
"""Build a generic section index using numbered headings."""
|
||||||
items = []
|
items = []
|
||||||
@@ -288,11 +337,11 @@ def find_text_in_doc(orig_text, full_norm, index, index_norm_positions):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# Try progressively shorter substrings from different positions
|
# Try progressively shorter substrings from different positions
|
||||||
for start_frac in [0.25, 0.1, 0.5, 0.0]:
|
for start_frac in [0.25, 0.1, 0.5, 0.0, 0.75]:
|
||||||
for length in [80, 60, 40, 30]:
|
for length in [80, 60, 40, 30, 20]:
|
||||||
start = max(0, int(len(orig_norm) * start_frac))
|
start = max(0, int(len(orig_norm) * start_frac))
|
||||||
snippet = orig_norm[start:start+length]
|
snippet = orig_norm[start:start+length]
|
||||||
if not snippet or len(snippet) < 25:
|
if not snippet or len(snippet) < 15:
|
||||||
continue
|
continue
|
||||||
pos = full_norm.find(snippet)
|
pos = full_norm.find(snippet)
|
||||||
if pos >= 0:
|
if pos >= 0:
|
||||||
@@ -380,6 +429,8 @@ def main():
|
|||||||
index = build_de_law_index(text)
|
index = build_de_law_index(text)
|
||||||
elif doc_type == "nist":
|
elif doc_type == "nist":
|
||||||
index = build_nist_index(text)
|
index = build_nist_index(text)
|
||||||
|
elif doc_type == "owasp":
|
||||||
|
index = build_owasp_index(text, source_name)
|
||||||
else:
|
else:
|
||||||
index = build_generic_index(text)
|
index = build_generic_index(text)
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user