Compare commits
2 Commits
751f4a5ee7
...
4bfb438c92
| Author | SHA1 | Date | |
|---|---|---|---|
| 4bfb438c92 | |||
| 0371eecc03 |
@@ -11,7 +11,7 @@ const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:80
|
|||||||
export async function POST(request: NextRequest) {
|
export async function POST(request: NextRequest) {
|
||||||
try {
|
try {
|
||||||
const body = await request.json()
|
const body = await request.json()
|
||||||
const { url } = body
|
const { url, categories = [] } = body
|
||||||
|
|
||||||
if (!url) {
|
if (!url) {
|
||||||
return NextResponse.json({ error: 'URL erforderlich' }, { status: 400 })
|
return NextResponse.json({ error: 'URL erforderlich' }, { status: 400 })
|
||||||
@@ -21,7 +21,7 @@ export async function POST(request: NextRequest) {
|
|||||||
const response = await fetch(`${BACKEND_URL}/api/compliance/agent/banner-check`, {
|
const response = await fetch(`${BACKEND_URL}/api/compliance/agent/banner-check`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ url }),
|
body: JSON.stringify({ url, categories }),
|
||||||
signal: AbortSignal.timeout(120000), // 2 min for Playwright
|
signal: AbortSignal.timeout(120000), // 2 min for Playwright
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -33,12 +33,34 @@ interface BannerResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const CATEGORIES = [
|
||||||
|
{ id: 'all', label: 'Alle Kategorien' },
|
||||||
|
{ id: 'necessary', label: 'Notwendig' },
|
||||||
|
{ id: 'statistics', label: 'Statistik' },
|
||||||
|
{ id: 'marketing', label: 'Marketing' },
|
||||||
|
{ id: 'functional', label: 'Funktional' },
|
||||||
|
{ id: 'preferences', label: 'Praeferenzen' },
|
||||||
|
]
|
||||||
|
|
||||||
export function BannerCheckTab() {
|
export function BannerCheckTab() {
|
||||||
const [url, setUrl] = useState('')
|
const [url, setUrl] = useState('')
|
||||||
const [loading, setLoading] = useState(false)
|
const [loading, setLoading] = useState(false)
|
||||||
const [progress, setProgress] = useState('')
|
const [progress, setProgress] = useState('')
|
||||||
const [error, setError] = useState<string | null>(null)
|
const [error, setError] = useState<string | null>(null)
|
||||||
const [result, setResult] = useState<BannerResult | null>(null)
|
const [result, setResult] = useState<BannerResult | null>(null)
|
||||||
|
const [categories, setCategories] = useState<string[]>(['all'])
|
||||||
|
|
||||||
|
const toggleCategory = (id: string) => {
|
||||||
|
if (id === 'all') {
|
||||||
|
setCategories(['all'])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
setCategories(prev => {
|
||||||
|
const without = prev.filter(c => c !== 'all' && c !== id)
|
||||||
|
const next = prev.includes(id) ? without : [...without, id]
|
||||||
|
return next.length === 0 ? ['all'] : next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
const handleScan = async (e: React.FormEvent) => {
|
const handleScan = async (e: React.FormEvent) => {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
@@ -49,11 +71,16 @@ export function BannerCheckTab() {
|
|||||||
setResult(null)
|
setResult(null)
|
||||||
setProgress('Cookie-Banner wird analysiert...')
|
setProgress('Cookie-Banner wird analysiert...')
|
||||||
|
|
||||||
|
// 'all' selected = empty array (test everything)
|
||||||
|
const selectedCategories = categories.includes('all')
|
||||||
|
? []
|
||||||
|
: categories
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch('/api/sdk/v1/agent/banner-check', {
|
const res = await fetch('/api/sdk/v1/agent/banner-check', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ url: url.trim() }),
|
body: JSON.stringify({ url: url.trim(), categories: selectedCategories }),
|
||||||
})
|
})
|
||||||
if (!res.ok) throw new Error(`Fehler: ${res.status}`)
|
if (!res.ok) throw new Error(`Fehler: ${res.status}`)
|
||||||
const data = await res.json()
|
const data = await res.json()
|
||||||
@@ -94,7 +121,8 @@ export function BannerCheckTab() {
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<form onSubmit={handleScan} className="flex gap-3">
|
<form onSubmit={handleScan} className="space-y-3">
|
||||||
|
<div className="flex gap-3">
|
||||||
<input
|
<input
|
||||||
type="url" value={url} onChange={e => setUrl(e.target.value)}
|
type="url" value={url} onChange={e => setUrl(e.target.value)}
|
||||||
placeholder="https://www.example.com/"
|
placeholder="https://www.example.com/"
|
||||||
@@ -102,7 +130,7 @@ export function BannerCheckTab() {
|
|||||||
disabled={loading} required
|
disabled={loading} required
|
||||||
/>
|
/>
|
||||||
<button type="submit" disabled={loading || !url.trim()}
|
<button type="submit" disabled={loading || !url.trim()}
|
||||||
className="px-6 py-3 bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50 transition-colors flex items-center gap-2 text-sm font-medium">
|
className="px-6 py-3 bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50 transition-colors flex items-center gap-2 text-sm font-medium whitespace-nowrap">
|
||||||
{loading ? (
|
{loading ? (
|
||||||
<><svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24">
|
<><svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
@@ -110,6 +138,38 @@ export function BannerCheckTab() {
|
|||||||
</svg>Pruefe...</>
|
</svg>Pruefe...</>
|
||||||
) : 'Banner pruefen'}
|
) : 'Banner pruefen'}
|
||||||
</button>
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex flex-wrap gap-2">
|
||||||
|
{CATEGORIES.map(cat => (
|
||||||
|
<label key={cat.id}
|
||||||
|
className={`inline-flex items-center gap-1.5 px-3 py-1.5 rounded-full text-xs font-medium cursor-pointer border transition-colors ${
|
||||||
|
categories.includes(cat.id)
|
||||||
|
? 'bg-purple-100 border-purple-300 text-purple-800'
|
||||||
|
: 'bg-gray-50 border-gray-200 text-gray-600 hover:bg-gray-100'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={categories.includes(cat.id)}
|
||||||
|
onChange={() => toggleCategory(cat.id)}
|
||||||
|
className="sr-only"
|
||||||
|
/>
|
||||||
|
<span className={`w-3 h-3 rounded-sm border flex items-center justify-center ${
|
||||||
|
categories.includes(cat.id)
|
||||||
|
? 'bg-purple-600 border-purple-600'
|
||||||
|
: 'border-gray-400'
|
||||||
|
}`}>
|
||||||
|
{categories.includes(cat.id) && (
|
||||||
|
<svg className="w-2 h-2 text-white" fill="currentColor" viewBox="0 0 12 12">
|
||||||
|
<path d="M10 3L4.5 8.5 2 6" stroke="currentColor" strokeWidth="2" fill="none" strokeLinecap="round" strokeLinejoin="round" />
|
||||||
|
</svg>
|
||||||
|
)}
|
||||||
|
</span>
|
||||||
|
{cat.label}
|
||||||
|
</label>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
{progress && (
|
{progress && (
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
|
|
||||||
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/google/uuid"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseNarrativeRequest is the request body for POST /projects/:id/parse-narrative.
|
// ParseNarrativeRequest is the request body for POST /projects/:id/parse-narrative.
|
||||||
@@ -43,8 +44,16 @@ func (h *IACEHandler) ParseNarrative(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. Parse narrative text deterministically
|
// Load project to get machine type for context-aware parsing
|
||||||
parseResult := iace.ParseNarrative(req.NarrativeText)
|
var machineType string
|
||||||
|
if projectID, err := uuid.Parse(c.Param("id")); err == nil {
|
||||||
|
if project, err := h.store.GetProject(c.Request.Context(), projectID); err == nil && project != nil {
|
||||||
|
machineType = project.MachineType
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. Parse narrative text deterministically (machine-type-aware)
|
||||||
|
parseResult := iace.ParseNarrative(req.NarrativeText, machineType)
|
||||||
|
|
||||||
// 2. Feed parsed tags into pattern engine
|
// 2. Feed parsed tags into pattern engine
|
||||||
// Collect all component IDs for tag resolution
|
// Collect all component IDs for tag resolution
|
||||||
|
|||||||
@@ -27,4 +27,9 @@ type HazardPattern struct {
|
|||||||
ZoneDE string `json:"zone_de,omitempty"` // Gefahrstelle/Zone
|
ZoneDE string `json:"zone_de,omitempty"` // Gefahrstelle/Zone
|
||||||
DefaultSeverity int `json:"default_severity,omitempty"` // 1-5
|
DefaultSeverity int `json:"default_severity,omitempty"` // 1-5
|
||||||
DefaultExposure int `json:"default_exposure,omitempty"` // 1-5
|
DefaultExposure int `json:"default_exposure,omitempty"` // 1-5
|
||||||
|
// MachineTypes restricts this pattern to specific machine types.
|
||||||
|
// Empty = fires for all machine types. If set, only fires when the
|
||||||
|
// project's machine_type is in this list. Prevents e.g. press-specific
|
||||||
|
// patterns from firing for a cobot project.
|
||||||
|
MachineTypes []string `json:"machine_types,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Toedliche Quetschverletzung, Amputation von Gliedmassen.",
|
HarmDE: "Toedliche Quetschverletzung, Amputation von Gliedmassen.",
|
||||||
AffectedDE: "Einrichter, Bedienpersonal im Werkzeugeinbauraum.",
|
AffectedDE: "Einrichter, Bedienpersonal im Werkzeugeinbauraum.",
|
||||||
ZoneDE: "Werkzeugeinbauraum unterhalb des Stoessels.",
|
ZoneDE: "Werkzeugeinbauraum unterhalb des Stoessels.",
|
||||||
DefaultSeverity: 5, DefaultExposure: 2,
|
DefaultSeverity: 5, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP046", NameDE: "Quetschen im Werkzeugeinbauraum", NameEN: "Crushing in die space",
|
ID: "HP046", NameDE: "Quetschen im Werkzeugeinbauraum", NameEN: "Crushing in die space",
|
||||||
@@ -38,7 +38,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Toedliche Quetschverletzung, Amputation der oberen Extremitaeten.",
|
HarmDE: "Toedliche Quetschverletzung, Amputation der oberen Extremitaeten.",
|
||||||
AffectedDE: "Einrichter, Werkzeugbauer, Instandhaltungspersonal.",
|
AffectedDE: "Einrichter, Werkzeugbauer, Instandhaltungspersonal.",
|
||||||
ZoneDE: "Werkzeugeinbauraum zwischen Ober- und Unterwerkzeug.",
|
ZoneDE: "Werkzeugeinbauraum zwischen Ober- und Unterwerkzeug.",
|
||||||
DefaultSeverity: 5, DefaultExposure: 3,
|
DefaultSeverity: 5, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP047", NameDE: "Oelnebelexposition Atemwege", NameEN: "Oil mist inhalation exposure",
|
ID: "HP047", NameDE: "Oelnebelexposition Atemwege", NameEN: "Oil mist inhalation exposure",
|
||||||
@@ -53,7 +53,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Atemwegsreizung, chronische Lungenerkrankung bei Langzeitexposition.",
|
HarmDE: "Atemwegsreizung, chronische Lungenerkrankung bei Langzeitexposition.",
|
||||||
AffectedDE: "Bedienpersonal, Personen im Nahbereich der Presse.",
|
AffectedDE: "Bedienpersonal, Personen im Nahbereich der Presse.",
|
||||||
ZoneDE: "Arbeitsbereich rund um die Presse, insbesondere Bedienerseite.",
|
ZoneDE: "Arbeitsbereich rund um die Presse, insbesondere Bedienerseite.",
|
||||||
DefaultSeverity: 3, DefaultExposure: 4,
|
DefaultSeverity: 3, DefaultExposure: 4, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP048", NameDE: "Verbrennung durch heisse Werkstuecke", NameEN: "Burns from hot workpieces",
|
ID: "HP048", NameDE: "Verbrennung durch heisse Werkstuecke", NameEN: "Burns from hot workpieces",
|
||||||
@@ -68,7 +68,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Verbrennungen zweiten oder dritten Grades an Haenden und Unterarmen.",
|
HarmDE: "Verbrennungen zweiten oder dritten Grades an Haenden und Unterarmen.",
|
||||||
AffectedDE: "Bedienpersonal, Einrichter bei Werkzeugwechsel.",
|
AffectedDE: "Bedienpersonal, Einrichter bei Werkzeugwechsel.",
|
||||||
ZoneDE: "Entnahmebereich, Werkzeugeinbauraum, Ablagetisch.",
|
ZoneDE: "Entnahmebereich, Werkzeugeinbauraum, Ablagetisch.",
|
||||||
DefaultSeverity: 4, DefaultExposure: 3,
|
DefaultSeverity: 4, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP049", NameDE: "Schwebende Last (Hubwerk/Aufzug)", NameEN: "Suspended load (hoist/elevator)",
|
ID: "HP049", NameDE: "Schwebende Last (Hubwerk/Aufzug)", NameEN: "Suspended load (hoist/elevator)",
|
||||||
@@ -83,7 +83,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Toedliche Verletzung durch herabfallende Last, Knochenbrueche.",
|
HarmDE: "Toedliche Verletzung durch herabfallende Last, Knochenbrueche.",
|
||||||
AffectedDE: "Personen im Gefahrenbereich unter der schwebenden Last.",
|
AffectedDE: "Personen im Gefahrenbereich unter der schwebenden Last.",
|
||||||
ZoneDE: "Bereich unterhalb des Hubwerks, Werkzeugwechselzone.",
|
ZoneDE: "Bereich unterhalb des Hubwerks, Werkzeugwechselzone.",
|
||||||
DefaultSeverity: 5, DefaultExposure: 2,
|
DefaultSeverity: 5, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP050", NameDE: "Einziehen/Scheren Transfersystem", NameEN: "Draw-in/shearing at transfer system",
|
ID: "HP050", NameDE: "Einziehen/Scheren Transfersystem", NameEN: "Draw-in/shearing at transfer system",
|
||||||
@@ -98,7 +98,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Schnittverletzungen, Amputation von Fingern, Quetschungen.",
|
HarmDE: "Schnittverletzungen, Amputation von Fingern, Quetschungen.",
|
||||||
AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.",
|
AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.",
|
||||||
ZoneDE: "Transferbereich zwischen den Pressenstationen.",
|
ZoneDE: "Transferbereich zwischen den Pressenstationen.",
|
||||||
DefaultSeverity: 4, DefaultExposure: 3,
|
DefaultSeverity: 4, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP051", NameDE: "Sturzgefahr Auswurfbereich", NameEN: "Fall hazard at ejection area",
|
ID: "HP051", NameDE: "Sturzgefahr Auswurfbereich", NameEN: "Fall hazard at ejection area",
|
||||||
@@ -114,7 +114,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Knochenbrueche, Prellungen, Kopfverletzungen bei Sturz.",
|
HarmDE: "Knochenbrueche, Prellungen, Kopfverletzungen bei Sturz.",
|
||||||
AffectedDE: "Bedienpersonal, Logistikmitarbeiter im Auswurfbereich.",
|
AffectedDE: "Bedienpersonal, Logistikmitarbeiter im Auswurfbereich.",
|
||||||
ZoneDE: "Auswurfschacht und angrenzender Bodenbereich.",
|
ZoneDE: "Auswurfschacht und angrenzender Bodenbereich.",
|
||||||
DefaultSeverity: 3, DefaultExposure: 4,
|
DefaultSeverity: 3, DefaultExposure: 4, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP052", NameDE: "Druckfreisetzung Hydraulikspeicher", NameEN: "Pressure release from hydraulic accumulator",
|
ID: "HP052", NameDE: "Druckfreisetzung Hydraulikspeicher", NameEN: "Pressure release from hydraulic accumulator",
|
||||||
@@ -129,7 +129,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Schwere Schnittverletzungen durch Oelstrahl, Augenverletzungen, Verbrennungen.",
|
HarmDE: "Schwere Schnittverletzungen durch Oelstrahl, Augenverletzungen, Verbrennungen.",
|
||||||
AffectedDE: "Instandhaltungspersonal, Hydraulik-Fachkraefte.",
|
AffectedDE: "Instandhaltungspersonal, Hydraulik-Fachkraefte.",
|
||||||
ZoneDE: "Hydraulikaggregat, Speicherbereich, Leitungsfuehrung.",
|
ZoneDE: "Hydraulikaggregat, Speicherbereich, Leitungsfuehrung.",
|
||||||
DefaultSeverity: 5, DefaultExposure: 2,
|
DefaultSeverity: 5, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP053", NameDE: "Impulslaerm Pressvorgang", NameEN: "Impact noise during press operation",
|
ID: "HP053", NameDE: "Impulslaerm Pressvorgang", NameEN: "Impact noise during press operation",
|
||||||
@@ -144,7 +144,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Laermschwerhoerigkeit, Tinnitus bei Langzeitexposition.",
|
HarmDE: "Laermschwerhoerigkeit, Tinnitus bei Langzeitexposition.",
|
||||||
AffectedDE: "Bedienpersonal, Personen in angrenzenden Arbeitsbereichen.",
|
AffectedDE: "Bedienpersonal, Personen in angrenzenden Arbeitsbereichen.",
|
||||||
ZoneDE: "Gesamter Pressenbereich, Radius ca. 5-10 m um die Maschine.",
|
ZoneDE: "Gesamter Pressenbereich, Radius ca. 5-10 m um die Maschine.",
|
||||||
DefaultSeverity: 3, DefaultExposure: 5,
|
DefaultSeverity: 3, DefaultExposure: 5, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP054", NameDE: "Schwungrad-Restenergie nach Abschaltung", NameEN: "Flywheel residual energy after shutdown",
|
ID: "HP054", NameDE: "Schwungrad-Restenergie nach Abschaltung", NameEN: "Flywheel residual energy after shutdown",
|
||||||
@@ -159,7 +159,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Erfassen durch rotierende Teile, schwere Schnittverletzungen, Skalpierung.",
|
HarmDE: "Erfassen durch rotierende Teile, schwere Schnittverletzungen, Skalpierung.",
|
||||||
AffectedDE: "Instandhaltungspersonal, Einrichter nach Maschinenstopp.",
|
AffectedDE: "Instandhaltungspersonal, Einrichter nach Maschinenstopp.",
|
||||||
ZoneDE: "Schwungradbereich, Kupplungsraum, Antriebsseite der Presse.",
|
ZoneDE: "Schwungradbereich, Kupplungsraum, Antriebsseite der Presse.",
|
||||||
DefaultSeverity: 4, DefaultExposure: 2,
|
DefaultSeverity: 4, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP055", NameDE: "Umgehung Schutzeinrichtung (Pressentuer)", NameEN: "Bypass of safety guard (press door)",
|
ID: "HP055", NameDE: "Umgehung Schutzeinrichtung (Pressentuer)", NameEN: "Bypass of safety guard (press door)",
|
||||||
@@ -174,7 +174,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Toedliche Quetsch- oder Scherverletzungen bei Eingriff in den Gefahrenbereich.",
|
HarmDE: "Toedliche Quetsch- oder Scherverletzungen bei Eingriff in den Gefahrenbereich.",
|
||||||
AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.",
|
AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.",
|
||||||
ZoneDE: "Gesamter Werkzeugeinbauraum hinter der Schutztuer.",
|
ZoneDE: "Gesamter Werkzeugeinbauraum hinter der Schutztuer.",
|
||||||
DefaultSeverity: 5, DefaultExposure: 3,
|
DefaultSeverity: 5, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP056", NameDE: "Fehlbedienung Zweihandschaltung", NameEN: "Two-hand control misoperation",
|
ID: "HP056", NameDE: "Fehlbedienung Zweihandschaltung", NameEN: "Two-hand control misoperation",
|
||||||
@@ -189,7 +189,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Quetschverletzungen der freien Hand im Werkzeugbereich.",
|
HarmDE: "Quetschverletzungen der freien Hand im Werkzeugbereich.",
|
||||||
AffectedDE: "Bedienpersonal an der Pressenbedienung.",
|
AffectedDE: "Bedienpersonal an der Pressenbedienung.",
|
||||||
ZoneDE: "Gefahrenbereich zwischen Ober- und Unterwerkzeug.",
|
ZoneDE: "Gefahrenbereich zwischen Ober- und Unterwerkzeug.",
|
||||||
DefaultSeverity: 5, DefaultExposure: 3,
|
DefaultSeverity: 5, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP057", NameDE: "Hydraulikoelleckage + Rutschgefahr", NameEN: "Hydraulic oil leakage + slip hazard",
|
ID: "HP057", NameDE: "Hydraulikoelleckage + Rutschgefahr", NameEN: "Hydraulic oil leakage + slip hazard",
|
||||||
@@ -204,7 +204,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Sturzverletzungen durch Ausrutschen, Hautreizungen bei Hautkontakt.",
|
HarmDE: "Sturzverletzungen durch Ausrutschen, Hautreizungen bei Hautkontakt.",
|
||||||
AffectedDE: "Bedienpersonal, Logistikmitarbeiter, alle Personen im Pressenbereich.",
|
AffectedDE: "Bedienpersonal, Logistikmitarbeiter, alle Personen im Pressenbereich.",
|
||||||
ZoneDE: "Bodenbereich rund um das Hydraulikaggregat und unter der Presse.",
|
ZoneDE: "Bodenbereich rund um das Hydraulikaggregat und unter der Presse.",
|
||||||
DefaultSeverity: 2, DefaultExposure: 4,
|
DefaultSeverity: 2, DefaultExposure: 4, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP058", NameDE: "Ergonomische Belastung Kistenwechsel", NameEN: "Ergonomic strain during bin changeover",
|
ID: "HP058", NameDE: "Ergonomische Belastung Kistenwechsel", NameEN: "Ergonomic strain during bin changeover",
|
||||||
@@ -219,7 +219,7 @@ func GetPressHazardPatterns() []HazardPattern {
|
|||||||
HarmDE: "Rueckenverletzungen, Bandscheibenvorfall, Muskel-Skelett-Erkrankungen.",
|
HarmDE: "Rueckenverletzungen, Bandscheibenvorfall, Muskel-Skelett-Erkrankungen.",
|
||||||
AffectedDE: "Bedienpersonal, Logistikmitarbeiter an der Presse.",
|
AffectedDE: "Bedienpersonal, Logistikmitarbeiter an der Presse.",
|
||||||
ZoneDE: "Auswurfbereich, Palettenstellplatz neben der Presse.",
|
ZoneDE: "Auswurfbereich, Palettenstellplatz neben der Presse.",
|
||||||
DefaultSeverity: 2, DefaultExposure: 5,
|
DefaultSeverity: 2, DefaultExposure: 5, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ type KeywordEntry struct {
|
|||||||
ComponentIDs []string // Matched component library IDs (C001-C135)
|
ComponentIDs []string // Matched component library IDs (C001-C135)
|
||||||
EnergyIDs []string // Matched energy source IDs (EN01-EN20)
|
EnergyIDs []string // Matched energy source IDs (EN01-EN20)
|
||||||
ExtraTags []string // Additional tags derived from keyword context
|
ExtraTags []string // Additional tags derived from keyword context
|
||||||
|
// MachineTypes restricts this keyword to specific machine types.
|
||||||
|
// Empty = matches all machine types. If set, only matches when the
|
||||||
|
// project's machine_type is in this list.
|
||||||
|
MachineTypes []string // e.g. ["press", "hydraulic_press"]
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetKeywordDictionary returns the complete keyword dictionary for
|
// GetKeywordDictionary returns the complete keyword dictionary for
|
||||||
@@ -14,13 +18,13 @@ type KeywordEntry struct {
|
|||||||
// machinery terminology in German and English.
|
// machinery terminology in German and English.
|
||||||
func GetKeywordDictionary() []KeywordEntry {
|
func GetKeywordDictionary() []KeywordEntry {
|
||||||
return []KeywordEntry{
|
return []KeywordEntry{
|
||||||
// ── Pressen / Umformmaschinen ───────────────────────────────────
|
// ── Pressen / Umformmaschinen (NUR fuer press/hydraulic_press) ──
|
||||||
{Keywords: []string{"presse", "press", "umform", "umformung"}, ComponentIDs: []string{"C008", "C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"high_force", "crush_point"}},
|
{Keywords: []string{"presse", "press", "umform", "umformung"}, ComponentIDs: []string{"C008", "C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"high_force", "crush_point"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press", "stamping_press"}},
|
||||||
{Keywords: []string{"kniehebel", "toggle"}, ComponentIDs: []string{"C121"}, ExtraTags: []string{"mechanical_transmission"}},
|
{Keywords: []string{"kniehebel", "toggle"}, ComponentIDs: []string{"C121"}, ExtraTags: []string{"mechanical_transmission"}, MachineTypes: []string{"press"}},
|
||||||
{Keywords: []string{"stossel", "stoessel", "ram", "slide"}, ComponentIDs: []string{"C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"moving_part", "crush_point", "gravity_risk"}},
|
{Keywords: []string{"stossel", "stoessel", "ram", "slide"}, ComponentIDs: []string{"C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"moving_part", "crush_point", "gravity_risk"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"}},
|
||||||
{Keywords: []string{"stempel", "punch", "matrize", "die"}, ComponentIDs: []string{"C126"}, ExtraTags: []string{"crush_point", "cutting_part"}},
|
{Keywords: []string{"stempel", "punch", "matrize", "die"}, ComponentIDs: []string{"C126"}, ExtraTags: []string{"crush_point", "cutting_part"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press", "stamping_press"}},
|
||||||
{Keywords: []string{"schwungrad", "flywheel"}, ComponentIDs: []string{"C133"}, EnergyIDs: []string{"EN02", "EN03"}, ExtraTags: []string{"stored_energy", "rotating_part"}},
|
{Keywords: []string{"schwungrad", "flywheel"}, ComponentIDs: []string{"C133"}, EnergyIDs: []string{"EN02", "EN03"}, ExtraTags: []string{"stored_energy", "rotating_part"}, MachineTypes: []string{"press", "mechanical_press"}},
|
||||||
{Keywords: []string{"werkzeugeinbauraum", "die space"}, ComponentIDs: []string{"C132"}, ExtraTags: []string{"crush_point", "pinch_point"}},
|
{Keywords: []string{"werkzeugeinbauraum", "die space"}, ComponentIDs: []string{"C132"}, ExtraTags: []string{"crush_point", "pinch_point"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"}},
|
||||||
|
|
||||||
// ── Foerdertechnik ──────────────────────────────────────────────
|
// ── Foerdertechnik ──────────────────────────────────────────────
|
||||||
{Keywords: []string{"foerderband", "transportband", "conveyor"}, ComponentIDs: []string{"C003"}, EnergyIDs: []string{"EN01", "EN02"}, ExtraTags: []string{"entanglement_risk"}},
|
{Keywords: []string{"foerderband", "transportband", "conveyor"}, ComponentIDs: []string{"C003"}, EnergyIDs: []string{"EN01", "EN02"}, ExtraTags: []string{"entanglement_risk"}},
|
||||||
|
|||||||
@@ -94,7 +94,9 @@ var roleKeywords = map[string]string{
|
|||||||
// ParseNarrative extracts components, energy sources, lifecycle phases,
|
// ParseNarrative extracts components, energy sources, lifecycle phases,
|
||||||
// roles, and tags from a machine description text. Fully deterministic,
|
// roles, and tags from a machine description text. Fully deterministic,
|
||||||
// no LLM required.
|
// no LLM required.
|
||||||
func ParseNarrative(text string) ParseResult {
|
// machineType is optional — if provided, keywords with MachineTypes
|
||||||
|
// restrictions are only matched when the machine type is in the list.
|
||||||
|
func ParseNarrative(text string, machineType ...string) ParseResult {
|
||||||
result := ParseResult{}
|
result := ParseResult{}
|
||||||
if text == "" {
|
if text == "" {
|
||||||
return result
|
return result
|
||||||
@@ -122,7 +124,27 @@ func ParseNarrative(text string) ParseResult {
|
|||||||
seenEnergy := make(map[string]bool)
|
seenEnergy := make(map[string]bool)
|
||||||
tagSet := make(map[string]bool)
|
tagSet := make(map[string]bool)
|
||||||
|
|
||||||
|
// Resolve machine type for filtering
|
||||||
|
var mType string
|
||||||
|
if len(machineType) > 0 {
|
||||||
|
mType = machineType[0]
|
||||||
|
}
|
||||||
|
|
||||||
for _, entry := range dictionary {
|
for _, entry := range dictionary {
|
||||||
|
// Skip keywords restricted to other machine types
|
||||||
|
if len(entry.MachineTypes) > 0 && mType != "" {
|
||||||
|
matched := false
|
||||||
|
for _, mt := range entry.MachineTypes {
|
||||||
|
if mt == mType {
|
||||||
|
matched = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !matched {
|
||||||
|
continue // This keyword is for a different machine type
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, kw := range entry.Keywords {
|
for _, kw := range entry.Keywords {
|
||||||
kwNorm := strings.ToLower(kw)
|
kwNorm := strings.ToLower(kw)
|
||||||
kwNorm = strings.ReplaceAll(kwNorm, "ä", "ae")
|
kwNorm = strings.ReplaceAll(kwNorm, "ä", "ae")
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ class DocCheckStatusResponse(BaseModel):
|
|||||||
|
|
||||||
class BannerCheckRequest(BaseModel):
|
class BannerCheckRequest(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
|
categories: list[str] = [] # empty = test all categories
|
||||||
|
|
||||||
|
|
||||||
@router.post("/banner-check")
|
@router.post("/banner-check")
|
||||||
@@ -103,7 +104,11 @@ async def run_banner_check(req: BannerCheckRequest):
|
|||||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{CONSENT_TESTER_URL}/scan",
|
f"{CONSENT_TESTER_URL}/scan",
|
||||||
json={"url": req.url, "timeout_per_phase": 10},
|
json={
|
||||||
|
"url": req.url,
|
||||||
|
"timeout_per_phase": 10,
|
||||||
|
"categories": req.categories,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
return resp.json()
|
return resp.json()
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ app.add_middleware(
|
|||||||
class ScanRequest(BaseModel):
|
class ScanRequest(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
timeout_per_phase: int = 10 # seconds to wait after page load
|
timeout_per_phase: int = 10 # seconds to wait after page load
|
||||||
|
categories: list[str] = [] # empty = test all categories
|
||||||
|
|
||||||
|
|
||||||
class ScanResponse(BaseModel):
|
class ScanResponse(BaseModel):
|
||||||
@@ -59,7 +60,7 @@ async def health():
|
|||||||
async def scan_consent(req: ScanRequest):
|
async def scan_consent(req: ScanRequest):
|
||||||
"""Run 3-phase consent test on a URL."""
|
"""Run 3-phase consent test on a URL."""
|
||||||
logger.info("Starting consent test for %s", req.url)
|
logger.info("Starting consent test for %s", req.url)
|
||||||
result = await run_consent_test(req.url, req.timeout_per_phase)
|
result = await run_consent_test(req.url, req.timeout_per_phase, req.categories)
|
||||||
|
|
||||||
# Build raw response dict for structured check mapping
|
# Build raw response dict for structured check mapping
|
||||||
phases = {
|
phases = {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
fastapi==0.115.12
|
fastapi==0.115.12
|
||||||
uvicorn==0.34.2
|
uvicorn==0.34.2
|
||||||
playwright==1.52.0
|
playwright==1.52.0
|
||||||
|
playwright-stealth==1.0.6
|
||||||
pydantic>=2.0
|
pydantic>=2.0
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
"""
|
"""
|
||||||
Banner Detector — identifies Consent Management Platforms and their buttons.
|
Banner Detector — identifies Consent Management Platforms and their buttons.
|
||||||
|
|
||||||
Supports 10+ CMPs with specific selectors + generic fallback.
|
Supports 30 CMPs with specific selectors + generic fallback + Shadow DOM.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from playwright.async_api import Page, Locator
|
from playwright.async_api import Page
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -79,6 +79,127 @@ CMP_SELECTORS = [
|
|||||||
"accept": "#tarteaucitronPersonalize2",
|
"accept": "#tarteaucitronPersonalize2",
|
||||||
"reject": "#tarteaucitronAllDenied2",
|
"reject": "#tarteaucitronAllDenied2",
|
||||||
},
|
},
|
||||||
|
# --- 20 additional CMPs ---
|
||||||
|
{
|
||||||
|
"name": "Sourcepoint",
|
||||||
|
"detect": "div[id^='sp_message']",
|
||||||
|
"accept": ".sp_choice_type_11",
|
||||||
|
"reject": ".sp_choice_type_13",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Axeptio",
|
||||||
|
"detect": "#axeptio_widget",
|
||||||
|
"accept": "[data-ax='accept']",
|
||||||
|
"reject": "[data-ax='decline']",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Iubenda",
|
||||||
|
"detect": "#iubenda-cs-banner",
|
||||||
|
"accept": ".iubenda-cs-accept-btn",
|
||||||
|
"reject": ".iubenda-cs-reject-btn",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Termly",
|
||||||
|
"detect": "#termly-code-snippet-support",
|
||||||
|
"accept": "[data-tid='banner-accept']",
|
||||||
|
"reject": "[data-tid='banner-decline']",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CookieFirst",
|
||||||
|
"detect": "#cookiefirst-root",
|
||||||
|
"accept": "[data-cookiefirst-action='accept']",
|
||||||
|
"reject": "[data-cookiefirst-action='reject']",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Complianz",
|
||||||
|
"detect": "#cmplz-cookiebanner-container",
|
||||||
|
"accept": ".cmplz-accept",
|
||||||
|
"reject": ".cmplz-deny",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CookieScript",
|
||||||
|
"detect": "#cookiescript_injected",
|
||||||
|
"accept": "#cookiescript_accept",
|
||||||
|
"reject": "#cookiescript_reject",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "HubSpot",
|
||||||
|
"detect": "#hs-eu-cookie-confirmation",
|
||||||
|
"accept": "#hs-eu-confirmation-button",
|
||||||
|
"reject": "#hs-eu-decline-button",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Civic UK",
|
||||||
|
"detect": "#ccc, .ccc-content",
|
||||||
|
"accept": "#ccc-recommended-settings",
|
||||||
|
"reject": "#ccc-reject-settings",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "GDPR Cookie Compliance",
|
||||||
|
"detect": "#moove_gdpr_cookie_modal",
|
||||||
|
"accept": ".moove-gdpr-modal-allow-all",
|
||||||
|
"reject": ".moove-gdpr-modal-save-settings",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CookieHub",
|
||||||
|
"detect": "#ch2-container",
|
||||||
|
"accept": "#ch2-btn-accept",
|
||||||
|
"reject": "#ch2-btn-decline",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Osano",
|
||||||
|
"detect": ".osano-cm-dialog",
|
||||||
|
"accept": ".osano-cm-accept-all",
|
||||||
|
"reject": ".osano-cm-deny",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Ketch",
|
||||||
|
"detect": "#ketch-consent",
|
||||||
|
"accept": "[data-testid='accept-button']",
|
||||||
|
"reject": "[data-testid='decline-button']",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Piwik PRO",
|
||||||
|
"detect": "#ppms_cm_popup_overlay",
|
||||||
|
"accept": "#ppms_cm_agree-to-all",
|
||||||
|
"reject": "#ppms_cm_reject-all",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Cookie Consent (Insites)",
|
||||||
|
"detect": ".cc-window",
|
||||||
|
"accept": ".cc-btn.cc-allow",
|
||||||
|
"reject": ".cc-btn.cc-deny",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Admiral",
|
||||||
|
"detect": "[id^='admiral-']",
|
||||||
|
"accept": "[class*='admiral-accept']",
|
||||||
|
"reject": "[class*='admiral-reject']",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Sibbo",
|
||||||
|
"detect": "#sibbo-cmp-layout",
|
||||||
|
"accept": "#sibbo-cmp-accept-all",
|
||||||
|
"reject": "#sibbo-cmp-reject-all",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Evidon",
|
||||||
|
"detect": "#_evidon_banner",
|
||||||
|
"accept": "#_evidon-accept-button",
|
||||||
|
"reject": "#_evidon-decline-button",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "LiveRamp",
|
||||||
|
"detect": "#_lr-cookie-consent",
|
||||||
|
"accept": "#_lr-accept-all",
|
||||||
|
"reject": "#_lr-reject-all",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Adsimple",
|
||||||
|
"detect": "#adconsent-usp-banner",
|
||||||
|
"accept": ".adconsent-accept-all",
|
||||||
|
"reject": ".adconsent-reject-all",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
# Generic fallback patterns (text-based)
|
# Generic fallback patterns (text-based)
|
||||||
@@ -94,45 +215,245 @@ GENERIC_REJECT_TEXTS = [
|
|||||||
"Decline", "Nein", "Nicht einverstanden",
|
"Decline", "Nein", "Nicht einverstanden",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Attribute-based generic selectors for consent buttons
|
||||||
|
_GENERIC_ATTR_ACCEPT = [
|
||||||
|
"[data-consent='accept']", "[data-cookie='accept']", "[data-gdpr='accept']",
|
||||||
|
"[data-consent-accept]", "[data-cookie-accept]",
|
||||||
|
]
|
||||||
|
_GENERIC_ATTR_REJECT = [
|
||||||
|
"[data-consent='reject']", "[data-cookie='reject']", "[data-gdpr='reject']",
|
||||||
|
"[data-consent-reject]", "[data-cookie-reject]",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Dialog / aria selectors to find consent containers
|
||||||
|
_DIALOG_SELECTORS = [
|
||||||
|
"[role='dialog']",
|
||||||
|
"[aria-label*='cookie' i]", "[aria-label*='consent' i]",
|
||||||
|
"[aria-label*='datenschutz' i]", "[aria-label*='Cookie' i]",
|
||||||
|
]
|
||||||
|
|
||||||
|
# JavaScript for recursive Shadow DOM search
|
||||||
|
_SHADOW_DETECT_JS = """
|
||||||
|
() => {
|
||||||
|
const KEYWORDS = /cookie|consent|datenschutz|privacy/i;
|
||||||
|
const results = [];
|
||||||
|
function walk(root) {
|
||||||
|
for (const el of root.querySelectorAll('*')) {
|
||||||
|
if (el.shadowRoot) {
|
||||||
|
const shadow = el.shadowRoot;
|
||||||
|
const text = shadow.innerHTML || '';
|
||||||
|
if (KEYWORDS.test(text)) {
|
||||||
|
const buttons = [];
|
||||||
|
for (const btn of shadow.querySelectorAll(
|
||||||
|
'button, a[role="button"], [role="button"]'
|
||||||
|
)) {
|
||||||
|
const t = (btn.textContent || '').trim();
|
||||||
|
if (t.length > 0 && t.length < 80) {
|
||||||
|
buttons.push(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (buttons.length > 0) {
|
||||||
|
const tag = el.tagName.toLowerCase();
|
||||||
|
const id = el.id ? '#' + el.id : '';
|
||||||
|
results.push({
|
||||||
|
host: tag + id,
|
||||||
|
buttons: buttons,
|
||||||
|
preview: text.substring(0, 200)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
walk(shadow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
walk(document);
|
||||||
|
return results.length > 0 ? results[0] : null;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
_SHADOW_CLICK_JS = """
|
||||||
|
(textPattern) => {
|
||||||
|
const regex = new RegExp(textPattern, 'i');
|
||||||
|
function walk(root) {
|
||||||
|
for (const el of root.querySelectorAll('*')) {
|
||||||
|
if (el.shadowRoot) {
|
||||||
|
const btns = el.shadowRoot.querySelectorAll(
|
||||||
|
'button, a[role="button"], [role="button"]'
|
||||||
|
);
|
||||||
|
for (const btn of btns) {
|
||||||
|
if (regex.test(btn.textContent || '')) {
|
||||||
|
btn.click();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const found = walk(el.shadowRoot);
|
||||||
|
if (found) return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return walk(document);
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def _detect_in_shadow_dom(page: Page) -> BannerInfo | None:
|
||||||
|
"""Search Shadow DOM roots for consent banners as last-resort fallback."""
|
||||||
|
try:
|
||||||
|
result = await page.evaluate(_SHADOW_DETECT_JS)
|
||||||
|
if not result:
|
||||||
|
return None
|
||||||
|
buttons = result.get("buttons", [])
|
||||||
|
host = result.get("host", "")
|
||||||
|
accept_pat = ""
|
||||||
|
reject_pat = ""
|
||||||
|
accept_kw = ("accept", "akzeptieren", "zustimmen", "agree", "allow",
|
||||||
|
"einverstanden", "alle")
|
||||||
|
reject_kw = ("reject", "ablehnen", "deny", "decline", "refuse",
|
||||||
|
"notwendig", "necessary", "essential")
|
||||||
|
for text in buttons:
|
||||||
|
low = text.lower()
|
||||||
|
if not accept_pat and any(k in low for k in accept_kw):
|
||||||
|
accept_pat = text
|
||||||
|
elif not reject_pat and any(k in low for k in reject_kw):
|
||||||
|
reject_pat = text
|
||||||
|
if not accept_pat and not reject_pat:
|
||||||
|
return None
|
||||||
|
return BannerInfo(
|
||||||
|
detected=True,
|
||||||
|
provider=f"ShadowDOM({host})",
|
||||||
|
accept_selector=f"shadow-click:{accept_pat}" if accept_pat else "",
|
||||||
|
reject_selector=f"shadow-click:{reject_pat}" if reject_pat else "",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _click_in_shadow_dom(page: Page, text_pattern: str) -> bool:
|
||||||
|
"""Click a button inside a Shadow DOM root matching the text pattern."""
|
||||||
|
try:
|
||||||
|
return await page.evaluate(_SHADOW_CLICK_JS, text_pattern)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _detect_generic_dialog(page: Page) -> BannerInfo | None:
|
||||||
|
"""Detect consent banners in dialog/aria containers."""
|
||||||
|
consent_kw = ("cookie", "consent", "datenschutz", "privacy")
|
||||||
|
for sel in _DIALOG_SELECTORS:
|
||||||
|
try:
|
||||||
|
containers = page.locator(sel)
|
||||||
|
count = await containers.count()
|
||||||
|
if count == 0:
|
||||||
|
continue
|
||||||
|
container = containers.first
|
||||||
|
text = (await container.inner_text(timeout=2000)).lower()
|
||||||
|
if not any(kw in text for kw in consent_kw):
|
||||||
|
continue
|
||||||
|
# Found a consent dialog — look for accept/reject buttons
|
||||||
|
accept = ""
|
||||||
|
reject = ""
|
||||||
|
for asel in _GENERIC_ATTR_ACCEPT:
|
||||||
|
if await container.locator(asel).count() > 0:
|
||||||
|
accept = f"{sel} {asel}"
|
||||||
|
break
|
||||||
|
for rsel in _GENERIC_ATTR_REJECT:
|
||||||
|
if await container.locator(rsel).count() > 0:
|
||||||
|
reject = f"{sel} {rsel}"
|
||||||
|
break
|
||||||
|
if not accept:
|
||||||
|
for t in GENERIC_ACCEPT_TEXTS:
|
||||||
|
if await container.get_by_text(t, exact=False).count() > 0:
|
||||||
|
accept = f'{sel} button:has-text("{t}")'
|
||||||
|
break
|
||||||
|
if not reject:
|
||||||
|
for t in GENERIC_REJECT_TEXTS:
|
||||||
|
if await container.get_by_text(t, exact=False).count() > 0:
|
||||||
|
reject = f'{sel} button:has-text("{t}")'
|
||||||
|
break
|
||||||
|
if accept or reject:
|
||||||
|
return BannerInfo(
|
||||||
|
detected=True, provider="Generic (dialog)",
|
||||||
|
accept_selector=accept, reject_selector=reject,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _detect_generic_attr(page: Page) -> BannerInfo | None:
|
||||||
|
"""Detect consent buttons by data-consent/data-cookie/data-gdpr attributes."""
|
||||||
|
accept = ""
|
||||||
|
reject = ""
|
||||||
|
for sel in _GENERIC_ATTR_ACCEPT:
|
||||||
|
try:
|
||||||
|
if await page.locator(sel).count() > 0:
|
||||||
|
accept = sel
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
for sel in _GENERIC_ATTR_REJECT:
|
||||||
|
try:
|
||||||
|
if await page.locator(sel).count() > 0:
|
||||||
|
reject = sel
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if accept or reject:
|
||||||
|
return BannerInfo(
|
||||||
|
detected=True, provider="Generic (attr)",
|
||||||
|
accept_selector=accept, reject_selector=reject,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def detect_banner(page: Page) -> BannerInfo:
|
async def detect_banner(page: Page) -> BannerInfo:
|
||||||
"""Detect which CMP is used and return button selectors."""
|
"""Detect which CMP is used and return button selectors."""
|
||||||
# Try CMP-specific selectors first
|
# 1. Try CMP-specific selectors
|
||||||
for cmp in CMP_SELECTORS:
|
for cmp in CMP_SELECTORS:
|
||||||
try:
|
try:
|
||||||
count = await page.locator(cmp["detect"]).count()
|
if await page.locator(cmp["detect"]).count() > 0:
|
||||||
if count > 0:
|
|
||||||
return BannerInfo(
|
return BannerInfo(
|
||||||
detected=True,
|
detected=True, provider=cmp["name"],
|
||||||
provider=cmp["name"],
|
|
||||||
accept_selector=cmp["accept"],
|
accept_selector=cmp["accept"],
|
||||||
reject_selector=cmp["reject"],
|
reject_selector=cmp["reject"],
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Generic fallback — search for buttons by text
|
# 2. Generic fallback — search buttons by text
|
||||||
for text in GENERIC_ACCEPT_TEXTS:
|
for text in GENERIC_ACCEPT_TEXTS:
|
||||||
try:
|
try:
|
||||||
btn = page.get_by_text(text, exact=False)
|
btn = page.get_by_text(text, exact=False)
|
||||||
if await btn.count() > 0:
|
if await btn.count() > 0:
|
||||||
accept = f'button:has-text("{text}")'
|
accept = f'button:has-text("{text}")'
|
||||||
# Try to find reject button nearby
|
|
||||||
reject = ""
|
reject = ""
|
||||||
for rtext in GENERIC_REJECT_TEXTS:
|
for rtext in GENERIC_REJECT_TEXTS:
|
||||||
rbtn = page.get_by_text(rtext, exact=False)
|
if await page.get_by_text(rtext, exact=False).count() > 0:
|
||||||
if await rbtn.count() > 0:
|
|
||||||
reject = f'button:has-text("{rtext}")'
|
reject = f'button:has-text("{rtext}")'
|
||||||
break
|
break
|
||||||
return BannerInfo(
|
return BannerInfo(
|
||||||
detected=True,
|
detected=True, provider="Generic",
|
||||||
provider="Generic",
|
accept_selector=accept, reject_selector=reject,
|
||||||
accept_selector=accept,
|
|
||||||
reject_selector=reject,
|
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# 3. Generic fallback — dialog/aria containers with consent keywords
|
||||||
|
dialog_result = await _detect_generic_dialog(page)
|
||||||
|
if dialog_result:
|
||||||
|
return dialog_result
|
||||||
|
|
||||||
|
# 4. Generic fallback — data-consent/data-cookie/data-gdpr attributes
|
||||||
|
attr_result = await _detect_generic_attr(page)
|
||||||
|
if attr_result:
|
||||||
|
return attr_result
|
||||||
|
|
||||||
|
# 5. Shadow DOM fallback — search inside shadow roots
|
||||||
|
shadow_result = await _detect_in_shadow_dom(page)
|
||||||
|
if shadow_result:
|
||||||
|
return shadow_result
|
||||||
|
|
||||||
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
|
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
|
||||||
|
|
||||||
|
|
||||||
@@ -140,10 +461,21 @@ async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
|
|||||||
"""Try to click a consent button. Returns True if clicked successfully."""
|
"""Try to click a consent button. Returns True if clicked successfully."""
|
||||||
if not selector:
|
if not selector:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Handle Shadow DOM selectors
|
||||||
|
if selector.startswith("shadow-click:"):
|
||||||
|
text_pattern = selector[len("shadow-click:"):]
|
||||||
|
return await _click_in_shadow_dom(page, text_pattern)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
locator = page.locator(selector).first
|
locator = page.locator(selector).first
|
||||||
await locator.wait_for(state="visible", timeout=timeout)
|
await locator.wait_for(state="visible", timeout=timeout)
|
||||||
await locator.click()
|
await locator.click()
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
|
# Fallback: try Shadow DOM click with selector text
|
||||||
|
# Extract button text from selector like 'button:has-text("Accept all")'
|
||||||
|
if ':has-text("' in selector:
|
||||||
|
text = selector.split(':has-text("')[1].rstrip('")')
|
||||||
|
return await _click_in_shadow_dom(page, text)
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -11,6 +11,12 @@ from dataclasses import dataclass, field
|
|||||||
|
|
||||||
from playwright.async_api import async_playwright, Page, BrowserContext
|
from playwright.async_api import async_playwright, Page, BrowserContext
|
||||||
|
|
||||||
|
try:
|
||||||
|
from playwright_stealth import stealth_async
|
||||||
|
HAS_STEALTH = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_STEALTH = False
|
||||||
|
|
||||||
from services.banner_detector import detect_banner, click_button, BannerInfo
|
from services.banner_detector import detect_banner, click_button, BannerInfo
|
||||||
from services.script_analyzer import (
|
from services.script_analyzer import (
|
||||||
classify_scripts, find_tracking_services,
|
classify_scripts, find_tracking_services,
|
||||||
@@ -53,22 +59,43 @@ class ConsentTestResult:
|
|||||||
banner_has_dse_link: bool = False
|
banner_has_dse_link: bool = False
|
||||||
|
|
||||||
|
|
||||||
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
async def run_consent_test(
|
||||||
"""Run 3-phase consent test on a URL."""
|
url: str, wait_secs: int = 10, categories: list[str] | None = None,
|
||||||
|
) -> ConsentTestResult:
|
||||||
|
"""Run 3-phase consent test on a URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Website URL to test.
|
||||||
|
wait_secs: Seconds to wait per phase.
|
||||||
|
categories: Optional list of category names to test (empty = test all).
|
||||||
|
"""
|
||||||
result = ConsentTestResult()
|
result = ConsentTestResult()
|
||||||
wait_ms = wait_secs * 1000
|
wait_ms = wait_secs * 1000
|
||||||
|
filter_cats = categories or []
|
||||||
|
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
browser = await p.chromium.launch(
|
browser = await p.chromium.launch(
|
||||||
headless=True,
|
headless=True,
|
||||||
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
args=[
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-dev-shm-usage",
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
"--window-size=1920,1080",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# ── Phase A: Before consent ──────────────────────────
|
# ── Phase A: Before consent ──────────────────────────
|
||||||
logger.info("Phase A: First visit (no interaction)")
|
logger.info("Phase A: First visit (no interaction)")
|
||||||
ctx_a = await browser.new_context(user_agent=USER_AGENT)
|
ctx_a = await browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="de-DE",
|
||||||
|
timezone_id="Europe/Berlin",
|
||||||
|
)
|
||||||
page_a = await ctx_a.new_page()
|
page_a = await ctx_a.new_page()
|
||||||
|
if HAS_STEALTH:
|
||||||
|
await stealth_async(page_a)
|
||||||
scripts_a = []
|
scripts_a = []
|
||||||
page_a.on("request", lambda req: _collect_script(req, scripts_a))
|
page_a.on("request", lambda req: _collect_script(req, scripts_a))
|
||||||
|
|
||||||
@@ -101,8 +128,15 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
|||||||
|
|
||||||
# ── Phase B: After rejecting ─────────────────────────
|
# ── Phase B: After rejecting ─────────────────────────
|
||||||
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
||||||
ctx_b = await browser.new_context(user_agent=USER_AGENT)
|
ctx_b = await browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="de-DE",
|
||||||
|
timezone_id="Europe/Berlin",
|
||||||
|
)
|
||||||
page_b = await ctx_b.new_page()
|
page_b = await ctx_b.new_page()
|
||||||
|
if HAS_STEALTH:
|
||||||
|
await stealth_async(page_b)
|
||||||
scripts_b = []
|
scripts_b = []
|
||||||
page_b.on("request", lambda req: _collect_script(req, scripts_b))
|
page_b.on("request", lambda req: _collect_script(req, scripts_b))
|
||||||
|
|
||||||
@@ -128,8 +162,15 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
|||||||
|
|
||||||
# ── Phase C: After accepting ─────────────────────────
|
# ── Phase C: After accepting ─────────────────────────
|
||||||
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
||||||
ctx_c = await browser.new_context(user_agent=USER_AGENT)
|
ctx_c = await browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="de-DE",
|
||||||
|
timezone_id="Europe/Berlin",
|
||||||
|
)
|
||||||
page_c = await ctx_c.new_page()
|
page_c = await ctx_c.new_page()
|
||||||
|
if HAS_STEALTH:
|
||||||
|
await stealth_async(page_c)
|
||||||
scripts_c = []
|
scripts_c = []
|
||||||
page_c.on("request", lambda req: _collect_script(req, scripts_c))
|
page_c.on("request", lambda req: _collect_script(req, scripts_c))
|
||||||
|
|
||||||
@@ -154,18 +195,40 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
|||||||
try:
|
try:
|
||||||
from services.category_tester import detect_categories, test_single_category
|
from services.category_tester import detect_categories, test_single_category
|
||||||
|
|
||||||
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
|
ctx_cat = await browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="de-DE",
|
||||||
|
timezone_id="Europe/Berlin",
|
||||||
|
)
|
||||||
page_cat = await ctx_cat.new_page()
|
page_cat = await ctx_cat.new_page()
|
||||||
|
if HAS_STEALTH:
|
||||||
|
await stealth_async(page_cat)
|
||||||
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
|
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
|
||||||
await page_cat.wait_for_timeout(2000)
|
await page_cat.wait_for_timeout(2000)
|
||||||
|
|
||||||
categories = await detect_categories(page_cat, banner)
|
detected_cats = await detect_categories(page_cat, banner)
|
||||||
await page_cat.close()
|
await page_cat.close()
|
||||||
|
|
||||||
if categories:
|
# Filter to requested categories if specified
|
||||||
logger.info("Testing %d categories individually", len(categories))
|
if filter_cats and detected_cats:
|
||||||
for cat in categories:
|
detected_cats = [
|
||||||
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
|
c for c in detected_cats if c.name in filter_cats
|
||||||
|
]
|
||||||
|
logger.info(
|
||||||
|
"Filtered to %d categories (requested: %s)",
|
||||||
|
len(detected_cats), filter_cats,
|
||||||
|
)
|
||||||
|
|
||||||
|
if detected_cats:
|
||||||
|
logger.info("Testing %d categories individually", len(detected_cats))
|
||||||
|
for cat in detected_cats:
|
||||||
|
cat_ctx = await browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="de-DE",
|
||||||
|
timezone_id="Europe/Berlin",
|
||||||
|
)
|
||||||
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
|
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
|
||||||
result.category_tests.append(cat_result)
|
result.category_tests.append(cat_result)
|
||||||
await cat_ctx.close()
|
await cat_ctx.close()
|
||||||
|
|||||||
Reference in New Issue
Block a user