How to use put_metric_alarm method in localstack

Best Python code snippet using localstack_python

addAlarm.py

Source:addAlarm.py Github

copy

Full Screen

...17 targetgroup_arn = action['TargetGroupArn']18 targetgroup = targetgroup_arn.split(':')[-1]19 targetgroup_list.append(targetgroup)20 print("[ActiveFlowCount, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 当链接数平均大于或等于 6000 就告警]")21 response = cloudwatch.put_metric_alarm(22 AlarmName='AWS_ELB_%s_ActiveFlowCount' % instance_name,23 AlarmDescription='Alarm when ELB ActiveFlowCount exceeds 6000',24 ActionsEnabled=True,25 OKActions=[sns_arn],26 AlarmActions=[sns_arn],27 MetricName='ActiveFlowCount',28 Namespace='AWS/ApplicationELB',29 Statistic='Average',30 Dimensions=[31 {32 'Name': 'LoadBalancer',33 'Value': '%s' % arn34 }35 ],36 Period=60,37 EvaluationPeriods=3,38 DatapointsToAlarm=3,39 Threshold=6000,40 ComparisonOperator='GreaterThanOrEqualToThreshold',41 TreatMissingData='notBreaching'42 )43 print(response)44 print("[NewFlowCount, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 当链接数平均大于或等于 1000 就告警]")45 response = cloudwatch.put_metric_alarm(46 AlarmName='AWS_ELB_%s_NewFlowCount' % instance_name,47 AlarmDescription='Alarm when ELB NewFlowCount exceeds 6000',48 ActionsEnabled=True,49 OKActions=[sns_arn],50 AlarmActions=[sns_arn],51 MetricName='NewFlowCount',52 Namespace='AWS/ApplicationELB',53 Statistic='Average',54 Dimensions=[55 {56 'Name': 'LoadBalancer',57 'Value': '%s' % arn58 }59 ],60 Period=60,61 EvaluationPeriods=3,62 DatapointsToAlarm=3,63 Threshold=1000,64 ComparisonOperator='GreaterThanOrEqualToThreshold',65 TreatMissingData='notBreaching'66 )67 print(response)68 print("[ProcessedBytes, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 当链接数平均大于或等于 5m 就告警]")69 response = cloudwatch.put_metric_alarm(70 AlarmName='AWS_ELB_%s_ProcessedBytes' % instance_name,71 AlarmDescription='Alarm when ELB ProcessedBytes exceeds 5m',72 ActionsEnabled=True,73 OKActions=[sns_arn],74 AlarmActions=[sns_arn],75 MetricName='ProcessedBytes',76 Namespace='AWS/ApplicationELB',77 Statistic='Average',78 Dimensions=[79 {80 'Name': 'LoadBalancer',81 'Value': 'app/%s' % arn82 }83 ],84 Period=60,85 EvaluationPeriods=3,86 DatapointsToAlarm=3,87 Threshold=5000000,88 ComparisonOperator='GreaterThanOrEqualToThreshold',89 TreatMissingData='notBreaching'90 )91 print(response)92 for targetgroup in set(targetgroup_list):93 targetgroup_name = targetgroup.split('/')[1]94 print("[HealthyHostCount, 1分钟采集1次, 周期为1分钟, 1分钟有1个数据点超过阈值就告警, 健康主机的最大值小于等于 0 就告警]")95 response = cloudwatch.put_metric_alarm(96 AlarmName='AWS_ELB_%s_%s_HealthyHostCount' % (instance_name, targetgroup_name),97 AlarmDescription='Alarm when ELB HealthyHostCount less than 0',98 ActionsEnabled=True,99 OKActions=[sns_arn],100 AlarmActions=[sns_arn],101 MetricName='HealthyHostCount',102 Namespace='AWS/ApplicationELB',103 Statistic='Maximum',104 Dimensions=[105 {106 'Name': 'TargetGroup',107 'Value': '%s' % targetgroup108 },109 {110 'Name': 'LoadBalancer',111 'Value': '%s' % arn112 }113 ],114 Period=60,115 Unit='Percent',116 EvaluationPeriods=1,117 DatapointsToAlarm=1,118 Threshold=0,119 ComparisonOperator='LessThanOrEqualToThreshold',120 TreatMissingData='notBreaching'121 )122 print(response)123 print("[UnHealthyHostCount, 1分钟采集1次, 周期为1分钟, 1分钟有1个数据点超过阈值就告警, 当不健康主机数量大于或等于 1 个就告警]")124 response = cloudwatch.put_metric_alarm(125 AlarmName='AWS_ELB_%s_%s_UnHealthyHostCount' % (instance_name, targetgroup_name),126 AlarmDescription='Alarm when ELB UnHealthyHostCount exceeds 5m',127 ActionsEnabled=True,128 OKActions=[sns_arn],129 AlarmActions=[sns_arn],130 MetricName='UnHealthyHostCount',131 Namespace='AWS/ApplicationELB',132 Statistic='Average',133 Dimensions=[134 {135 'Name': 'TargetGroup',136 'Value': '%s' % targetgroup137 },138 {139 'Name': 'LoadBalancer',140 'Value': '%s' % arn141 }142 ],143 Period=60,144 EvaluationPeriods=1,145 DatapointsToAlarm=1,146 Threshold=1,147 ComparisonOperator='GreaterThanOrEqualToThreshold',148 TreatMissingData='notBreaching'149 )150 print(response)151def add_elb_http_alarm(instance_group, instance_arn, port=80, instance_name=None):152 print("[Add elb http %s alarm.]" % instance_group)153 print("[HTTPCode_Target_5XX_Count, 1分钟采集1次, 周期为1分钟, 1分钟有1个数据点超过阈值就告警, 当5xx超过 10 个为超过阈值]")154 response = cloudwatch.put_metric_alarm(155 AlarmName='AWS_ELB_%s_%s_HTTPCode_Target_5XX_Count' % (instance_name, port),156 AlarmDescription='Alarm when ELB HTTPCode_Target_5XX_Count exceeds 10',157 ActionsEnabled=True,158 OKActions=[sns_arn],159 AlarmActions=[sns_arn],160 MetricName='HTTPCode_Target_5XX_Count',161 Namespace='AWS/ApplicationELB',162 Statistic='Sum',163 Dimensions=[164 {165 'Name': 'TargetGroup',166 'Value': 'targetgroup/%s' % instance_group167 },168 {169 'Name': 'LoadBalancer',170 'Value': 'app/%s' % instance_arn171 }172 ],173 Period=60,174 EvaluationPeriods=1,175 DatapointsToAlarm=1,176 Threshold=10,177 ComparisonOperator='GreaterThanOrEqualToThreshold',178 TreatMissingData='notBreaching'179 )180 print(response)181 print("[HTTPCode_Target_4XX_Count, 1分钟采集1次, 周期为1分钟, 5分钟有3个数据点超过阈值就告警, 当4xx超过 10% 为超过阈值]")182 response = cloudwatch.put_metric_alarm(183 AlarmName='AWS_ELB_%s_%s_HTTPCode_Target_4XX_Count' % (instance_name, port),184 AlarmDescription='Alarm when ELB HTTPCode_Target_4XX_Count exceeds 10',185 ActionsEnabled=True,186 OKActions=[sns_arn],187 AlarmActions=[sns_arn],188 MetricName='HTTPCode_Target_4XX_Count',189 Namespace='AWS/ApplicationELB',190 Statistic='Sum',191 Dimensions=[192 {193 'Name': 'TargetGroup',194 'Value': 'targetgroup/%s' % instance_group195 },196 {197 'Name': 'LoadBalancer',198 'Value': 'app/%s' % instance_arn199 }200 ],201 Period=60,202 EvaluationPeriods=5,203 DatapointsToAlarm=3,204 Threshold=10,205 ComparisonOperator='GreaterThanOrEqualToThreshold',206 TreatMissingData='notBreaching'207 )208 print(response)209def add_ec2_alarm(instance_id, instance_name=None):210 print("Add ec2 %s alarm." % instance_id)211 print("[CPUUtilization, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 80%]")212 response = cloudwatch.put_metric_alarm(213 AlarmName='AWS_EC2_%s_CPUUtilization' % (instance_name if instance_name else instance_id),214 AlarmDescription='Alarm when server CPU exceeds 80%',215 ActionsEnabled=True,216 OKActions=[sns_arn],217 AlarmActions=[sns_arn],218 MetricName='CPUUtilization',219 Namespace='AWS/EC2',220 Statistic='Average',221 Dimensions=[222 {223 'Name': 'InstanceId',224 'Value': '%s' % instance_id225 },226 ],227 Period=60,228 Unit='Percent',229 EvaluationPeriods=3,230 DatapointsToAlarm=3,231 Threshold=80.0,232 ComparisonOperator='GreaterThanOrEqualToThreshold',233 TreatMissingData='notBreaching'234 )235 print(response)236 print("[NetworkIn, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")237 response = cloudwatch.put_metric_alarm(238 AlarmName='AWS_EC2_%s_NetworkIn' % (instance_name if instance_name else instance_id),239 AlarmDescription='Alarm when server NetworkIn exceeds 5m',240 ActionsEnabled=True,241 OKActions=[sns_arn],242 AlarmActions=[sns_arn],243 MetricName='NetworkIn',244 Namespace='AWS/EC2',245 Statistic='Average',246 Dimensions=[247 {248 'Name': 'InstanceId',249 'Value': '%s' % instance_id250 },251 ],252 Period=60,253 EvaluationPeriods=3,254 DatapointsToAlarm=3,255 Threshold=5000000,256 ComparisonOperator='GreaterThanOrEqualToThreshold',257 TreatMissingData='notBreaching'258 )259 print(response)260 print("[NetworkOut, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")261 response = cloudwatch.put_metric_alarm(262 AlarmName='AWS_EC2_%s_NetworkOut' % (instance_name if instance_name else instance_id),263 AlarmDescription='Alarm when server NetworkOut exceeds 5m',264 ActionsEnabled=True,265 OKActions=[sns_arn],266 AlarmActions=[sns_arn],267 MetricName='NetworkOut',268 Namespace='AWS/EC2',269 Statistic='Average',270 Dimensions=[271 {272 'Name': 'InstanceId',273 'Value': '%s' % instance_id274 },275 ],276 Period=60,277 EvaluationPeriods=3,278 DatapointsToAlarm=3,279 Threshold=5000000,280 ComparisonOperator='GreaterThanOrEqualToThreshold',281 TreatMissingData='notBreaching'282 )283 print(response)284 print("[StatusCheckFailed_System, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")285 response = cloudwatch.put_metric_alarm(286 AlarmName='AWS_EC2_%s_StatusCheckFailed_System' % (instance_name if instance_name else instance_id),287 AlarmDescription='Alarm when server NetworkOut Status Check Failed',288 ActionsEnabled=True,289 OKActions=[sns_arn],290 AlarmActions=[sns_arn],291 MetricName='StatusCheckFailed_System',292 Namespace='AWS/EC2',293 Statistic='Average',294 Dimensions=[295 {296 'Name': 'InstanceId',297 'Value': '%s' % instance_id298 },299 ],300 Period=60,301 EvaluationPeriods=3,302 DatapointsToAlarm=3,303 Threshold=1.0,304 ComparisonOperator='GreaterThanOrEqualToThreshold',305 TreatMissingData='notBreaching'306 )307 print(response)308def add_ec2_ebs_alarm(instance_id, instance_name=None):309 print("Add ec2 ebs %s alarm." % instance_id)310 ec2d = boto3.resource('ec2')311 instance = ec2d.Instance(instance_id)312 vol_id = instance.volumes.all()313 print(vol_id)314 for v in vol_id:315 print("[Found EBS volume %s on instance %s]" % (v.id, instance_id))316 print("[VolumeIdleTime, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 80%]")317 response = cloudwatch.put_metric_alarm(318 AlarmName='AWS_EC2_%s_EBS_%s_VolumeIdleTime' % (instance_name if instance_name else instance_id, v.id),319 AlarmDescription='Alarm when server CPU exceeds 80%',320 ActionsEnabled=True,321 OKActions=[sns_arn],322 AlarmActions=[sns_arn],323 MetricName='VolumeIdleTime',324 Namespace='AWS/EBS',325 Statistic='Average',326 Dimensions=[327 {328 'Name': 'VolumeId',329 'Value': '%s' % v.id330 },331 ],332 Period=60,333 EvaluationPeriods=3,334 DatapointsToAlarm=3,335 Threshold=30.0,336 ComparisonOperator='GreaterThanOrEqualToThreshold',337 TreatMissingData='notBreaching'338 )339 print(response)340def add_redis_alarm(instance_id, instance_name=None):341 print("[Add redis %s alarm.]" % instance_id)342 print("[CPUUtilization, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 80%]")343 response = cloudwatch.put_metric_alarm(344 AlarmName='AWS_REDIS_%s_CPUUtilization' % (instance_name if instance_name else instance_id),345 AlarmDescription='Alarm when redis CPU exceeds 80%',346 ActionsEnabled=True,347 OKActions=[sns_arn],348 AlarmActions=[sns_arn],349 MetricName='CPUUtilization',350 Namespace='AWS/ElastiCache',351 Statistic='Average',352 Dimensions=[353 {354 'Name': 'CacheClusterId',355 'Value': '%s' % instance_id356 },357 ],358 Period=60,359 Unit='Percent',360 EvaluationPeriods=3,361 DatapointsToAlarm=3,362 Threshold=80.0,363 ComparisonOperator='GreaterThanOrEqualToThreshold',364 TreatMissingData='notBreaching'365 )366 print(response)367 print("[EngineCPUUtilization, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 80%]")368 response = cloudwatch.put_metric_alarm(369 AlarmName='AWS_REDIS_%s_EngineCPUUtilization' % (instance_name if instance_name else instance_id),370 AlarmDescription='Alarm when redis Engine CPU exceeds 80%',371 ActionsEnabled=True,372 OKActions=[sns_arn],373 AlarmActions=[sns_arn],374 MetricName='EngineCPUUtilization',375 Namespace='AWS/ElastiCache',376 Statistic='Average',377 Dimensions=[378 {379 'Name': 'CacheClusterId',380 'Value': '%s' % instance_id381 },382 ],383 Period=60,384 Unit='Percent',385 EvaluationPeriods=3,386 DatapointsToAlarm=3,387 Threshold=80.0,388 ComparisonOperator='GreaterThanOrEqualToThreshold',389 TreatMissingData='notBreaching'390 )391 print(response)392 print("[CurrConnections, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 500]")393 response = cloudwatch.put_metric_alarm(394 AlarmName='AWS_REDIS_%s_CurrConnections' % (instance_name if instance_name else instance_id),395 AlarmDescription='Alarm when redis connections exceeds 500',396 ActionsEnabled=True,397 OKActions=[sns_arn],398 AlarmActions=[sns_arn],399 MetricName='CurrConnections',400 Namespace='AWS/ElastiCache',401 Statistic='Average',402 Dimensions=[403 {404 'Name': 'CacheClusterId',405 'Value': '%s' % instance_id406 },407 ],408 Period=60,409 Unit='Percent',410 EvaluationPeriods=3,411 DatapointsToAlarm=3,412 Threshold=500,413 ComparisonOperator='GreaterThanOrEqualToThreshold',414 TreatMissingData='notBreaching'415 )416 print(response)417 print("[FreeableMemory, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值小于于或等于 1G]")418 response = cloudwatch.put_metric_alarm(419 AlarmName='AWS_REDIS_%s_FreeableMemory' % (instance_name if instance_name else instance_id),420 AlarmDescription='Alarm when redis FreeableMemory Less than 1G',421 ActionsEnabled=True,422 OKActions=[sns_arn],423 AlarmActions=[sns_arn],424 MetricName='FreeableMemory',425 Namespace='AWS/ElastiCache',426 Statistic='Average',427 Dimensions=[428 {429 'Name': 'CacheClusterId',430 'Value': '%s' % instance_id431 },432 ],433 Period=60,434 Unit='Percent',435 EvaluationPeriods=3,436 DatapointsToAlarm=3,437 Threshold=1000000000,438 ComparisonOperator='LessThanOrEqualToThreshold',439 TreatMissingData='notBreaching'440 )441 print(response)442 print("[NetworkBytesIn, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")443 response = cloudwatch.put_metric_alarm(444 AlarmName='AWS_REDIS_%s_NetworkBytesIn' % (instance_name if instance_name else instance_id),445 AlarmDescription='Alarm when redis NetworkBytesIn exceeds 5m',446 ActionsEnabled=True,447 OKActions=[sns_arn],448 AlarmActions=[sns_arn],449 MetricName='NetworkBytesIn',450 Namespace='AWS/ElastiCache',451 Statistic='Average',452 Dimensions=[453 {454 'Name': 'CacheClusterId',455 'Value': '%s' % instance_id456 },457 ],458 Period=60,459 Unit='Percent',460 EvaluationPeriods=3,461 DatapointsToAlarm=3,462 Threshold=5000000,463 ComparisonOperator='GreaterThanOrEqualToThreshold',464 TreatMissingData='notBreaching'465 )466 print(response)467 print("[NetworkBytesOut, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")468 response = cloudwatch.put_metric_alarm(469 AlarmName='AWS_REDIS_%s_NetworkBytesOut' % (instance_name if instance_name else instance_id),470 AlarmDescription='Alarm when redis NetworkBytesOut exceeds 5m',471 ActionsEnabled=True,472 OKActions=[sns_arn],473 AlarmActions=[sns_arn],474 MetricName='NetworkBytesOut',475 Namespace='AWS/ElastiCache',476 Statistic='Average',477 Dimensions=[478 {479 'Name': 'CacheClusterId',480 'Value': '%s' % instance_id481 },482 ],483 Period=60,484 Unit='Percent',485 EvaluationPeriods=3,486 DatapointsToAlarm=3,487 Threshold=5000000,488 ComparisonOperator='GreaterThanOrEqualToThreshold',489 TreatMissingData='notBreaching'490 )491 print(response)492 print("[CacheMisses, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")493 response = cloudwatch.put_metric_alarm(494 AlarmName='AWS_REDIS_%s_CacheMisses' % (instance_name if instance_name else instance_id),495 AlarmDescription='Alarm when redis CacheMisses exceeds 5000',496 ActionsEnabled=True,497 OKActions=[sns_arn],498 AlarmActions=[sns_arn],499 MetricName='CacheMisses',500 Namespace='AWS/ElastiCache',501 Statistic='Average',502 Dimensions=[503 {504 'Name': 'CacheClusterId',505 'Value': '%s' % instance_id506 },507 ],508 Period=60,509 Unit='Percent',510 EvaluationPeriods=3,511 DatapointsToAlarm=3,512 Threshold=5000,513 ComparisonOperator='GreaterThanOrEqualToThreshold',514 TreatMissingData='notBreaching'515 )516 print(response)517def add_mysql_alarm(instance_id, instance_name=None):518 print("[Add mysql %s alarm.]" % instance_id)519 print("[CPUUtilization, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")520 response = cloudwatch.put_metric_alarm(521 AlarmName='AWS_MYSQL_%s_CPUUtilization' % (instance_name if instance_name else instance_id),522 AlarmDescription='Alarm when mysql CPUUtilization exceeds 80%',523 ActionsEnabled=True,524 OKActions=[sns_arn],525 AlarmActions=[sns_arn],526 MetricName='CPUUtilization',527 Namespace='AWS/RDS',528 Statistic='Average',529 Dimensions=[530 {531 'Name': 'DBInstanceIdentifier',532 'Value': '%s' % instance_id533 },534 ],535 Period=60,536 Unit='Percent',537 EvaluationPeriods=3,538 DatapointsToAlarm=3,539 Threshold=80.0,540 ComparisonOperator='GreaterThanOrEqualToThreshold',541 TreatMissingData='notBreaching'542 )543 print(response)544 print("[DatabaseConnections, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于或等于 500]")545 response = cloudwatch.put_metric_alarm(546 AlarmName='AWS_MYSQL_%s_DatabaseConnections' % (instance_name if instance_name else instance_id),547 AlarmDescription='Alarm when mysql DatabaseConnections exceeds 500',548 ActionsEnabled=True,549 OKActions=[sns_arn],550 AlarmActions=[sns_arn],551 MetricName='DatabaseConnections',552 Namespace='AWS/RDS',553 Statistic='Average',554 Dimensions=[555 {556 'Name': 'DBInstanceIdentifier',557 'Value': '%s' % instance_id558 },559 ],560 Period=60,561 Unit='Percent',562 EvaluationPeriods=3,563 DatapointsToAlarm=3,564 Threshold=500,565 ComparisonOperator='GreaterThanOrEqualToThreshold',566 TreatMissingData='notBreaching'567 )568 print(response)569 print("[FreeableMemory, 1分钟采集1次, 周期为3分钟, 3分钟有3个数据点超过阈值就告警, 平均值小于或等于 1g]")570 response = cloudwatch.put_metric_alarm(571 AlarmName='AWS_MYSQL_%s_FreeableMemory' % (instance_name if instance_name else instance_id),572 AlarmDescription='Alarm when mysql FreeableMemory less than 1g',573 ActionsEnabled=True,574 OKActions=[sns_arn],575 AlarmActions=[sns_arn],576 MetricName='FreeableMemory',577 Namespace='AWS/RDS',578 Statistic='Average',579 Dimensions=[580 {581 'Name': 'DBInstanceIdentifier',582 'Value': '%s' % instance_id583 },584 ],585 Period=60,586 Unit='Percent',587 EvaluationPeriods=3,588 DatapointsToAlarm=3,589 Threshold=1000000000,590 ComparisonOperator='LessThanOrEqualToThreshold',591 TreatMissingData='notBreaching'592 )593 print(response)594 print("[FreeStorageSpace, 5分钟采集1次, 周期为5分钟, 15分钟有3个数据点超过阈值就告警, 平均值小于于或等于 10g]")595 response = cloudwatch.put_metric_alarm(596 AlarmName='AWS_MYSQL_%s_FreeStorageSpace' % (instance_name if instance_name else instance_id),597 AlarmDescription='Alarm when mysql FreeStorageSpace less than 10g',598 ActionsEnabled=True,599 OKActions=[sns_arn],600 AlarmActions=[sns_arn],601 MetricName='FreeableMemory',602 Namespace='AWS/RDS',603 Statistic='Average',604 Dimensions=[605 {606 'Name': 'DBInstanceIdentifier',607 'Value': '%s' % instance_id608 },609 ],610 Period=300,611 Unit='Percent',612 EvaluationPeriods=3,613 DatapointsToAlarm=3,614 Threshold=10000000000,615 ComparisonOperator='LessThanOrEqualToThreshold',616 TreatMissingData='notBreaching'617 )618 print(response)619 print("[NetworkTransmitThroughput, 1分钟采集1次, 周期为1分钟, 3分钟有3个数据点超过阈值就告警, 平均值大于于或等于5m就告警]")620 response = cloudwatch.put_metric_alarm(621 AlarmName='AWS_MYSQL_%s_NetworkTransmitThroughput' % (instance_name if instance_name else instance_id),622 AlarmDescription='Alarm when mysql NetworkTransmitThroughput exceeds 5m',623 ActionsEnabled=True,624 OKActions=[sns_arn],625 AlarmActions=[sns_arn],626 MetricName='NetworkTransmitThroughput',627 Namespace='AWS/RDS',628 Statistic='Average',629 Dimensions=[630 {631 'Name': 'DBInstanceIdentifier',632 'Value': '%s' % instance_id633 },634 ],635 Period=60,636 Unit='Percent',637 EvaluationPeriods=3,638 DatapointsToAlarm=3,639 Threshold=5000000,640 ComparisonOperator='GreaterThanOrEqualToThreshold',641 TreatMissingData='notBreaching'642 )643 print(response)644 print("[NetworkReceiveThroughput, 5分钟采集1次, 周期为5分钟, 15分钟有3个数据点超过阈值就告警, 平均值大于或等于 5m]")645 response = cloudwatch.put_metric_alarm(646 AlarmName='AWS_MYSQL_%s_NetworkReceiveThroughput' % (instance_name if instance_name else instance_id),647 AlarmDescription='Alarm when mysql NetworkReceiveThroughput exceeds 5m',648 ActionsEnabled=True,649 OKActions=[sns_arn],650 AlarmActions=[sns_arn],651 MetricName='NetworkReceiveThroughput',652 Namespace='AWS/RDS',653 Statistic='Average',654 Dimensions=[655 {656 'Name': 'DBInstanceIdentifier',657 'Value': '%s' % instance_id658 },659 ],...

Full Screen

Full Screen

opensearch_cloudwatch_alarms.py

Source:opensearch_cloudwatch_alarms.py Github

copy

Full Screen

...7# Create CloudWatch client8cloudwatch = boto3.client("cloudwatch")9def create_cloudwatch_alarms(INSTANCE_ID):10 # CLUSTER STATUS11 cloudwatch.put_metric_alarm(12 AlarmName="ClusterStatus-Red",13 AlarmDescription="At least one primary shard and its replicas are not allocated to a node.",14 ComparisonOperator="GreaterThanThreshold",15 Threshold=0,16 EvaluationPeriods=1,17 MetricName="ClusterStatus.red",18 Namespace="AWS/ES",19 Period=60,20 Statistic="Maximum",21 ActionsEnabled=False,22 Dimensions=[23 {24 "Name": "InstanceId",25 "Value": INSTANCE_ID,26 },27 ],28 Unit="Seconds",29 )30 cloudwatch.put_metric_alarm(31 AlarmName="ClusterStatus-Yellow",32 AlarmDescription="At least one replica shard is not allocated to a node.",33 ComparisonOperator="GreaterThanThreshold",34 Threshold=0,35 EvaluationPeriods=1,36 MetricName="ClusterStatus.yellow",37 Namespace="AWS/ES",38 Period=60,39 Statistic="Maximum",40 ActionsEnabled=False,41 Dimensions=[42 {43 "Name": "InstanceId",44 "Value": INSTANCE_ID,45 },46 ],47 Unit="Seconds",48 )49 # STORAGE SPACE50 # Based on needing 25% free.51 cloudwatch.put_metric_alarm(52 AlarmName="FreeStorageSpace",53 AlarmDescription="Alarm for lack of available storage space.",54 ComparisonOperator="LessThanThreshold",55 # TODO: get the disk size here and set to 25% of that.56 Threshold=20 * 1e9,57 EvaluationPeriods=1,58 MetricName="FreeStorageSpace",59 Namespace="AWS/ES",60 Period=60,61 Statistic="Average",62 ActionsEnabled=False,63 Dimensions=[64 {65 "Name": "InstanceId",66 "Value": INSTANCE_ID,67 },68 ],69 Unit="Seconds",70 )71 # WRITES72 cloudwatch.put_metric_alarm(73 AlarmName="ClusterIndexWritesBlocked",74 AlarmDescription="Alarm for cluster blocking write requests.",75 ComparisonOperator="GreaterThanThreshold",76 Threshold=0,77 EvaluationPeriods=1,78 MetricName="ClusterIndexWritesBlocked",79 Namespace="AWS/ES",80 Period=300,81 ActionsEnabled=False,82 Statistic="SampleCount",83 Dimensions=[84 {85 "Name": "InstanceId",86 "Value": INSTANCE_ID,87 },88 ],89 Unit="Seconds",90 )91 # NODES AND SHARDS92 # All nodes are reachable.93 cloudwatch.put_metric_alarm(94 AlarmName="NodesNotReachable",95 AlarmDescription="Alarm for at least one node unavailable.",96 ComparisonOperator="LessThanThreshold",97 # TODO: get number of nodes for instance and add alarm based on that.98 Threshold=3,99 Statistic="Minimum",100 EvaluationPeriods=1,101 MetricName="Nodes",102 Namespace="AWS/ES",103 Period=86400,104 ActionsEnabled=False,105 Dimensions=[106 {107 "Name": "InstanceId",108 "Value": INSTANCE_ID,109 },110 ],111 Unit="Seconds",112 )113 cloudwatch.put_metric_alarm(114 AlarmName="AutomatedSnapshotFailure",115 AlarmDescription="Alarm for at least one node unavailable.",116 ComparisonOperator="GreaterThanThreshold",117 Threshold=0,118 Statistic="Maximum",119 EvaluationPeriods=1,120 MetricName="AutomatedSnapshotFailure",121 Namespace="AWS/ES",122 Period=60,123 ActionsEnabled=False,124 Dimensions=[125 {126 "Name": "InstanceId",127 "Value": INSTANCE_ID,128 },129 ],130 Unit="Seconds",131 )132 cloudwatch.put_metric_alarm(133 AlarmName="Shards-Active",134 AlarmDescription="Alarm for too many active primary and replica shards.",135 ComparisonOperator="GreaterThanOrEqualToThreshold",136 Threshold=30000,137 Statistic="SampleCount",138 EvaluationPeriods=1,139 MetricName="shards.active",140 Namespace="AWS/ES",141 Period=60,142 ActionsEnabled=False,143 Dimensions=[144 {145 "Name": "InstanceId",146 "Value": INSTANCE_ID,147 },148 ],149 Unit="Seconds",150 )151 # CPU and RAM utilisation152 cloudwatch.put_metric_alarm(153 AlarmName="CPUUtilization",154 AlarmDescription="Alarm for sustained high CPU usage.",155 ComparisonOperator="GreaterThanThreshold",156 Threshold=80,157 Statistic="Maximum",158 EvaluationPeriods=3,159 MetricName="CPUUtilization",160 Namespace="AWS/ES",161 Period=900,162 ActionsEnabled=False,163 Dimensions=[164 {165 "Name": "InstanceId",166 "Value": INSTANCE_ID,167 },168 ],169 Unit="Seconds",170 )171 cloudwatch.put_metric_alarm(172 AlarmName="JVMMemoryPressure",173 AlarmDescription="Alarm for sustained high RAM usage.",174 ComparisonOperator="GreaterThanThreshold",175 Threshold=80,176 Statistic="Maximum",177 EvaluationPeriods=3,178 MetricName="JVMMemoryPressure",179 Namespace="AWS/ES",180 Period=300,181 ActionsEnabled=False,182 Dimensions=[183 {184 "Name": "InstanceId",185 "Value": INSTANCE_ID,186 },187 ],188 Unit="Seconds",189 )190 # Encryption keys191 cloudwatch.put_metric_alarm(192 AlarmName="KMSKeyError",193 AlarmDescription="Alarm for encryption key disabled.",194 ComparisonOperator="GreaterThanThreshold",195 Threshold=0,196 Statistic="SampleCount",197 EvaluationPeriods=1,198 MetricName="KMSKeyError",199 Namespace="AWS/ES",200 Period=60,201 ActionsEnabled=False,202 Dimensions=[203 {204 "Name": "InstanceId",205 "Value": INSTANCE_ID,206 },207 ],208 Unit="Seconds",209 )210 cloudwatch.put_metric_alarm(211 AlarmName="KMSKeyInaccessible",212 AlarmDescription="Alarm for encryption key deleted or grants revoked.",213 ComparisonOperator="GreaterThanThreshold",214 Threshold=0,215 Statistic="SampleCount",216 EvaluationPeriods=1,217 MetricName="KMSKeyInaccessible",218 Namespace="AWS/ES",219 Period=60,220 ActionsEnabled=False,221 Dimensions=[222 {223 "Name": "InstanceId",224 "Value": INSTANCE_ID,225 },226 ],227 Unit="Seconds",228 )229 # Search and Indexing concurrency230 cloudwatch.put_metric_alarm(231 AlarmName="ThreadpoolWriteQueue",232 AlarmDescription="Alarm for high average concurrency of indexing requests.",233 ComparisonOperator="GreaterThanOrEqualToThreshold",234 Threshold=100,235 Statistic="Average",236 EvaluationPeriods=1,237 MetricName="ThreadpoolWriteQueue",238 Namespace="AWS/ES",239 Period=60,240 ActionsEnabled=False,241 Dimensions=[242 {243 "Name": "InstanceId",244 "Value": INSTANCE_ID,245 },246 ],247 Unit="Seconds",248 )249 cloudwatch.put_metric_alarm(250 AlarmName="ThreadpoolSearchQueueAverage",251 AlarmDescription="Alarm for high average concurrency of search requests.",252 ComparisonOperator="GreaterThanOrEqualToThreshold",253 Threshold=500,254 Statistic="Average",255 EvaluationPeriods=1,256 MetricName="ThreadpoolSearchQueue",257 Namespace="AWS/ES",258 Period=60,259 ActionsEnabled=False,260 Dimensions=[261 {262 "Name": "InstanceId",263 "Value": INSTANCE_ID,264 },265 ],266 Unit="Seconds",267 )268 cloudwatch.put_metric_alarm(269 AlarmName="ThreadpoolSearchQueueMaximum",270 AlarmDescription="Alarm for high maximum concurrency of search requests.",271 ComparisonOperator="GreaterThanOrEqualToThreshold",272 Threshold=5000,273 Statistic="Maximum",274 EvaluationPeriods=1,275 MetricName="ThreadpoolSearchQueue",276 Namespace="AWS/ES",277 Period=60,278 ActionsEnabled=False,279 Dimensions=[280 {281 "Name": "InstanceId",282 "Value": INSTANCE_ID,...

Full Screen

Full Screen

lambda_function.py

Source:lambda_function.py Github

copy

Full Screen

...20 create_alarm_disk(instance_name,instanceid,metric['Dimensions'][0]['Value'] , metric['Dimensions'][2]['Value'], metric['Dimensions'][3]['Value'])21 if metric['MetricName'] == 'mem_used_percent':22 create_alarm_mem(instance_name, instanceid)23 # Create Alarm "CPU Utilization Greater than 95% for 10+ Minutes"24 cw.put_metric_alarm(25 AlarmName=instance_name+"-"+instanceid+" Utilização de CPU > 95%",26 AlarmDescription='Utilização de CPU > 95% por 10+ Minutos',27 ActionsEnabled=True,28 # OKActions=[29 # sns_pager,30 # sns_slack31 # ],32 # AlarmActions=[33 # sns_pager,34 # sns_slack35 # ],36 MetricName='CPUUtilization',37 Namespace='AWS/EC2',38 Statistic='Average',39 Dimensions=[40 {41 'Name': 'InstanceId',42 'Value': instanceid43 },44 ],45 Period=300,46 EvaluationPeriods=2,47 Threshold=95.0,48 ComparisonOperator='GreaterThanOrEqualToThreshold'49 )50 51 # Create Metric "Status Check Failed (System) for 5 Minutes"52 cw.put_metric_alarm(53 AlarmName=instance_name+"-"+instanceid+" System Check Failed",54 AlarmDescription='Status Check Failed (System) for 5 Minutes',55 ActionsEnabled=True,56 # OKActions=[57 # sns_pager,58 # sns_slack59 # ],60 # AlarmActions=[61 # sns_pager,62 # sns_slack63 # ],64 MetricName='StatusCheckFailed_System',65 Namespace='AWS/EC2',66 Statistic='Average',67 Dimensions=[68 {69 'Name': 'InstanceId',70 'Value': instanceid71 },72 ],73 Period=60,74 EvaluationPeriods=5,# tempo em minutos75 Threshold=1.0,76 ComparisonOperator='GreaterThanOrEqualToThreshold'77 )78 # Create Alarm "Status Check Failed (Instance) for 20 Minutes"79 cw.put_metric_alarm(80 AlarmName=instance_name+"-"+instanceid+" Instance Check Failed",81 AlarmDescription='Status Check Failed (Instance) for 20 Minutes',82 ActionsEnabled=True,83 # OKActions=[84 # sns_pager,85 # sns_slack86 # ],87 # AlarmActions=[88 # sns_pager,89 # sns_slack90 # ],91 MetricName='StatusCheckFailed_Instance',92 Namespace='AWS/EC2',93 Statistic='Average',94 Dimensions=[95 {96 'Name': 'InstanceId',97 'Value': instanceid98 },99 ],100 Period=60,101 EvaluationPeriods=20,# tempo em minutos102 Threshold=1.0,103 ComparisonOperator='GreaterThanOrEqualToThreshold'104 )105 #List All Devices of the Instance106 ec2d = boto3.resource('ec2', region_name= region)107 instance = ec2d.Instance(instanceid)108 vol_id = instance.volumes.all()109 devices = instance.block_device_mappings110 111 for v in vol_id:112 dev = [ dev['DeviceName'] for dev in devices if dev['Ebs']['VolumeId'] == v.id ]113 114 #Create Alarm device disk115 create_alarm_disk_dev(instanceid,v,dev)116def create_alarm_mem(instance_name, instanceid):117 # Create Alarm "MemoryUtilization Utilization More than 95% for 3+ Minutes"118 cw.put_metric_alarm(119 AlarmName=instance_name+"-"+instanceid+" Utilização de memória > 95%",120 AlarmDescription='Utilização de memória > 95% por 5+ Minutos',121 ActionsEnabled=True,122 # OKActions=[123 # sns_pager,124 # sns_slack125 # ],126 # AlarmActions=[127 # sns_pager,128 # sns_slack129 # ],130 MetricName='mem_used_percent',131 Namespace='CWAgent',132 Statistic='Average',133 Dimensions=[134 {135 'Name': 'InstanceId',136 'Value': instanceid137 },138 ],139 Period=60,140 EvaluationPeriods=5,# tempo em minutos141 Threshold=95,142 ComparisonOperator='GreaterThanOrEqualToThreshold'143 )144 145def get_instance_name(fid):146 # When given an instance ID as str e.g. 'i-1234567', return the instance 'Name' from the name tag.147 ec2 = boto3.resource('ec2')148 ec2instance = ec2.Instance(fid)149 instancename = ''150 try:151 for tags in ec2instance.tags:152 if tags["Key"] == 'Name':153 instancename = tags["Value"]154 return instancename155 except Exception as e:156 return fid157def get_instances():158 # Retorna os ids das intancias 159 ec2 = boto3.resource('ec2')160 ec2instances = ec2.instances.all()161 ids = []162 for instance in ec2instances:163 if instance.state['Name'] == 'running':164 ids.append(instance.id)165 return ids166def create_alarm_disk(instance_name,instanceid,path,device,fstype):167 cw.put_metric_alarm(168 AlarmName=instance_name+"-"+instanceid+" Utilização de disco > 95% no "+path,169 AlarmDescription="Utilização de disco > 95% no "+path,170 ActionsEnabled=True,171 # OKActions=[172 # sns_pager,173 # sns_slack174 # ],175 # AlarmActions=[176 # sns_slack,177 # sns_pager178 # ],179 MetricName='disk_used_percent',180 Namespace='CWAgent',181 Statistic='Average',182 Dimensions=[183 {184 'Name': 'InstanceId',185 'Value': instanceid186 },187 {188 'Name': 'device',189 'Value': device190 },191 {192 'Name': 'fstype',193 'Value': fstype194 },195 {196 'Name': 'path',197 'Value': path198 },199 ],200 Period=60,201 EvaluationPeriods=1,202 Threshold=95,203 ComparisonOperator='GreaterThanThreshold'204 )205def create_alarm_disk_dev(instanceid,v,dev):206 # Create Alarm "Volume Idle Time < 0 sec (of 5 minutes) for 60 Minutes"207 cw.put_metric_alarm(208 AlarmName=v.id+" "+instanceid+" "+dev[0]+" High Volume Activity Critical",209 AlarmDescription='Volume Idle Time <= 30 sec (of 5 minutes) for 60 Minutes',210 ActionsEnabled=True,211 # OKActions=[212 # sns_pager,213 # sns_slack214 # ],215 # AlarmActions=[216 # sns_pager,217 # sns_slack218 # ],219 MetricName='VolumeIdleTime',220 Namespace='AWS/EBS',221 Statistic='Average',...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run localstack automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful