Column anomalies
How it works
Default Monitors by Data Type
Data quality metric
Column Type
Data quality metric
Column Type
Test configuration
Last updated
Was this helpful?
Last updated
Was this helpful?
Was this helpful?
models:
- name: < model name >
config:
elementary:
timestamp_column: < timestamp column >
columns:
- name: < column name >
tests:
- elementary.column_anomalies:
column_anomalies: < specific monitors, all if null >
where_expression: < sql expression >
time_bucket: # Daily by default
period: < time period >
count: < number of periods >
- name: < model name >
## if no timestamp is configured, elementary will monitor without time filtering
columns:
- name: < column name >
tests:
- elementary.column_anomalies:
column_anomalies: < specific monitors, all if null >
where_expression: < sql expression >models:
- name: login_events
config:
elementary:
timestamp_column: 'loaded_at'
columns:
- name: user_name
tests:
- elementary.column_anomalies:
column_anomalies:
- missing_count
- min_length
where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'"
time_bucket:
period: day
count: 1
tags: ['elementary']
- name: users
## if no timestamp is configured, elementary will monitor without time filtering
tests:
elementary.volume_anomalies
tags: ['elementary']
columns:
- name: user_id
tests:
- elementary.column_anomalies:
tags: ['elementary']
timestamp_column: 'updated_at'
where_expression: "event_type in ('event_1', 'event_2') and country_name != 'unwanted country'"
time_bucket:
period: < time period >
count: < number of periods >
- name: user_name
tests:
- elementary.column_anomalies:
column_anomalies:
- missing_count
- min_length
tags: ['elementary']tests:
— elementary.column_anomalies:
column_anomalies: column monitors list
timestamp_column: column name
where_expression: sql expression
anomaly_sensitivity: int
anomaly_direction: [both | spike | drop]
detection_period:
period: [hour | day | week | month]
count: int
training_period:
period: [hour | day | week | month]
count: int
time_bucket:
period: [hour | day | week | month]
count: int
seasonality: day_of_week
detection_delay:
period: [hour | day | week | month]
count: int
ignore_small_changes:
spike_failure_percent_threshold: int
drop_failure_percent_threshold: int
anomaly_exclude_metrics: [SQL expression]