c13aa9183a
Extends Method C: for each unknown narrative token that pattern text names, suggest
the keyword_dictionary tag = the RequiredComponentTags shared by the naming
patterns (ranked by frequency, kept only when shared by >=40% of them, top 3).
Surfaces real dictionary gaps like "zwischenkreis" -> stored_energy and
"updates" -> has_software, which close coverage without hand-editing the dict.
Two precision fixes to Method C while here:
- patternsMentioning now matches WHOLE WORDS, not substrings — substring matching
flagged fragments like "stehen" inside "entstehen" and produced nonsensical
tag suggestions.
- a token is only proposed with a tag if one is shared by >=40% of its naming
patterns, so diffuse common verbs (spread across categories) drop out.
Wired into iace-audit propose -> audit-reports/vocab.{md,json}. Residual
common-verb noise is left to the human/LLM filter rather than a hand-grown
stopword list. Type 4 (coverage blind spots) + P3 (pin accepted proposals into a
GT case) remain for slice 6.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
89 lines
3.6 KiB
Go
89 lines
3.6 KiB
Go
package audit
|
|
|
|
// Stubs for Methods B-E. Each is filled in its own file as the audit
|
|
// suite grows. Keeping the type contracts here lets the CLI compile
|
|
// before each method has its full implementation.
|
|
|
|
// ============================================================================
|
|
// Method B — Component Self-Consistency
|
|
// ============================================================================
|
|
|
|
type CategoryGap struct {
|
|
Category string `json:"category"`
|
|
SuggestedTags []string `json:"suggested_tags"`
|
|
}
|
|
|
|
type ComponentResult struct {
|
|
ComponentID string `json:"component_id"`
|
|
NameDE string `json:"name_de"`
|
|
DeclaredCategories []string `json:"declared_categories"`
|
|
CoveredCategories []string `json:"covered_categories"`
|
|
MissingForCategories []CategoryGap `json:"missing_for_categories,omitempty"`
|
|
}
|
|
|
|
type ConsistencyReport struct {
|
|
TotalComponents int `json:"total_components"`
|
|
Consistent int `json:"consistent"`
|
|
Incomplete int `json:"incomplete"`
|
|
IncompleteComponents []ComponentResult `json:"incomplete_components"`
|
|
}
|
|
|
|
// ============================================================================
|
|
// Method C — Limits-Form Vocabulary Diff
|
|
// ============================================================================
|
|
|
|
type DictionarySuggestion struct {
|
|
Token string `json:"token"`
|
|
Field string `json:"field"`
|
|
PatternIDs []string `json:"pattern_ids"`
|
|
// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
|
|
// ranked by frequency — the candidate tags a keyword_dictionary entry for this
|
|
// token would emit so narratives mentioning it can trigger those patterns.
|
|
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
|
}
|
|
|
|
type VocabularyReport struct {
|
|
UniqueTokens int `json:"unique_tokens"`
|
|
KnownTokens []string `json:"known_tokens"`
|
|
UnknownTokens []string `json:"unknown_tokens"`
|
|
SuggestedDictionaryEntries []DictionarySuggestion `json:"suggested_dictionary_entries"`
|
|
}
|
|
|
|
// ============================================================================
|
|
// Method D — Limits-Form Echo
|
|
// ============================================================================
|
|
|
|
type OrphanedPhrase struct {
|
|
Field string `json:"field"`
|
|
Phrase string `json:"phrase"`
|
|
BestScore float64 `json:"best_score"`
|
|
}
|
|
|
|
type EchoReport struct {
|
|
TotalPhrases int `json:"total_phrases"`
|
|
Echoed int `json:"echoed"`
|
|
Orphaned int `json:"orphaned"`
|
|
OrphanedPhrases []OrphanedPhrase `json:"orphaned_phrases"`
|
|
}
|
|
|
|
// ============================================================================
|
|
// Method E — Hierarchy Completeness
|
|
// ============================================================================
|
|
|
|
type HazardHierarchyResult struct {
|
|
HazardID string `json:"hazard_id"`
|
|
Name string `json:"name"`
|
|
Category string `json:"category"`
|
|
Levels []string `json:"present_levels"`
|
|
MissingLevels []string `json:"missing_levels"`
|
|
}
|
|
|
|
type HierarchyReport struct {
|
|
TotalHazards int `json:"total_hazards"`
|
|
Complete int `json:"complete"`
|
|
MissingDesign int `json:"missing_design"`
|
|
MissingProtection int `json:"missing_protection"`
|
|
MissingInfo int `json:"missing_information"`
|
|
IncompleteHazards []HazardHierarchyResult `json:"incomplete_hazards"`
|
|
}
|