I want to write down airflow dags cleanup dag. I' https://github.com/teamclairvoyant/airflow-maintenance-dags for reference. I have written a cleanup DAG which takes all the dags and check their schedule, if it's once, it deletes the DAG. When I'm trying to run the cleanup dag, it is throwing sqlchemy error(Screenshot Attached). Please help me out with this error.
from airflow.operators.python_operator import PythonOperator
from airflow import settings
from datetime import timedelta
import os
import os.path
import socket
import logging
import airflow
DAG_ID = "AIRFLOW_CLEANUP_DAG"
START_DATE = airflow.utils.dates.days_ago(1)
SCHEDULE_INTERVAL = "#once"
DAG_OWNER_NAME = "PDM"
ALERT_EMAIL_ADDRESSES = []
ENABLE_DELETE = True
default_args = {
"owner": DAG_OWNER_NAME,
"depends_on_past": False,
"email": ALERT_EMAIL_ADDRESSES,
"email_on_failure": True,
"email_on_retry": False,
"start_date": START_DATE,
"retries": 1,
"retry_delay": timedelta(minutes=1),
}
dag = DAG(
DAG_ID,
default_args=default_args,
schedule_interval=SCHEDULE_INTERVAL,
start_date=START_DATE,
catchup=False,
description="Performing airflow cleanup",
)
if hasattr(dag, "doc_md"):
dag.doc_md = __doc__
if hasattr(dag, "catchup"):
dag.catchup = False
def cleanup_once_dags_fn(**context):
logging.info("Starting to run cleanup Process")
try:
host_name = socket.gethostname()
host_ip = socket.gethostbyname(host_name)
logging.info("Running on Machine with Host Name: " + host_name)
logging.info("Running on Machine with IP: " + host_ip)
except Exception as e:
print("Unable to get Host Name and IP: " + str(e))
session = settings.Session()
logging.info("Configurations:")
logging.info("enable_delete: " + str(ENABLE_DELETE))
logging.info("session: " + str(session))
logging.info("")
dags = session.query(DagModel).all()
entries_to_delete = []
logging.info(f"Total dags :: {len(dags)}")
for dag in dags:
dag_schedule = dag.schedule_interval
if dag_schedule == "#once" and dag.dag_id=='ba54206d-078c-42e8-a6b5-ad579e833364':
entries_to_delete.append(dag)
logging.info(f"Dags with once schedule {len(entries_to_delete)}")
logging.info("Process will be Deleting the DAG(s) from the DB:")
logging.info("Process will be Deleting " + str(len(entries_to_delete)) + " DAG(s)")
if ENABLE_DELETE:
logging.info("Performing Delete...")
for entry in entries_to_delete:
session.delete(entry)
session.commit()
logging.info("Finished Performing Delete")
else:
logging.warn("You're opted to skip deleting the DAG entries!!!")
logging.info("Finished Running Clear Process")
cleanup_once_dags = PythonOperator(
task_id="cleanup_once_dags", python_callable=cleanup_once_dags_fn, provide_context=True, dag=dag
)
Related
I ran into the error to invoke cloud function by using CloudFunctionInvokeFunctionOperator like this.
line 915, in execute raise HttpError(resp, content, uri=self.uri) googleapiclient.errors.HttpError: <HttpError 404 when requesting https://cloudfunctions.googleapis.com/v1/projects/pongthorn/locations/asia-southeast1/functions/crypto-trading-to-bq:call?alt=json returned "Function crypto-trading-to-bq in region asia-southeast1 in project pongthorn does not exist". Details: "Function crypto-trading-to-bq in region asia-southeast1 in project pongthorn does not exist">
I assump that I my made mistake at function id , What is functoin id , the figure below is my cloud functoin and function name is crypto-trading-to-bq
function id and function name , they are the same??????
I set 3 variable names on JSON file and upload it to airflow as the following values
{
"project_id": "pongthorn",
"region_name": "asia-southeast1",
"function_name": "crypto-trading-to-bq"
}
This is my code
import datetime
import airflow
from airflow.providers.google.cloud.operators.functions import (
CloudFunctionDeleteFunctionOperator,
CloudFunctionDeployFunctionOperator,
CloudFunctionInvokeFunctionOperator,
)
YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)
XProjdctID=airflow.models.Variable.get('project_id')
XRegion=airflow.models.Variable.get('region_name')
XFunction=airflow.models.Variable.get('function_name')
default_args = {
'owner': 'Binance Trading Transaction',
'depends_on_past': False,
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'start_date': YESTERDAY,
}
with airflow.DAG(
'bn_trading_flow',
catchup=False,
default_args=default_args,
schedule_interval=datetime.timedelta(days=1)) as dag:
call_crypto_trading_to_bq = CloudFunctionInvokeFunctionOperator(
task_id = "load_crypto_trading_to_bq",
location = XRegion,
function_id = XFunction,
input_data = {},
)
I have the following task:
this_is_a_task = SimpleHttpOperator(
task_id= 'task_id',
method='POST',
http_conn_id='conn_id',
endpoint='/?test=foo',
# data={"test": "foo"},
headers={"Content-Type": "application/json"}
on the cloud functions side, I'm trying to catch the parameters with the two following ways:
# catching data
# test_data = request.get_json().get('test')
# print('test: {}'.format(test))
# catching end point
test_endpoint = request.args.get('test')
print('test: {}'.format(test))
the second option is working (request.args.get('test')) however when trying the first option request.get_json().get('test') I'm getting a 400 request error.
so if I'm not using the endpoint variable from my SimpleHttpOperator how can I catch the json object pass into the data variable?
I've tried to replicate your issue and based on this documentation you need to add json.dumps when you are calling a POST with json data. Then provide authentication credentials as a Google-generated ID token stored in an Authorization header.
See below sample code:
import datetime
import json
from airflow import models
from airflow.operators import bash
from airflow.providers.http.operators.http import SimpleHttpOperator
YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)
default_args = {
'owner': 'Composer Example',
'depends_on_past': False,
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'start_date': YESTERDAY,
}
with models.DAG(
'composer_quickstart',
catchup=False,
default_args=default_args,
schedule_interval=datetime.timedelta(days=1)) as dag:
# Print the dag_run id from the Airflow logs
gen_auth = bash.BashOperator(
task_id='gen_auth', bash_command='gcloud auth print-identity-token '
)
auth_token = "{{ task_instance.xcom_pull(task_ids='gen_auth') }}"
this_is_a_task = SimpleHttpOperator(
task_id='task_id',
method='POST',
http_conn_id='cf_conn1',
data=json.dumps({"test": "foo"}),
headers={"Content-Type": "application/json","Authorization": "Bearer " + auth_token}
)
gen_auth >> this_is_a_task
On the cloud functions side tried to use below sample code:
test_data = request.get_json().get('test')
print(test_data)
return test_data
You can also test your function using this curl command:
curl -i -X POST -H "Content-Type:application/json" -d '{"test": "foo"}' "Authorization: bearer $(gcloud auth print-identity-token)" https://function-5-k6ssrsqwma-uc.a.run.app
I have this code for connect to MySQL through a SSH, inside of a python class:
def executeQuery(self, query_string):
print("connecting to database " + self.sql_main_database)
with SSHTunnelForwarder(
(
self.ssh_host,
self.ssh_port),
ssh_username = self.ssh_user,
ssh_pkey = self.pkey,
remote_bind_address=(self.sql_hostname, self.sql_port)
) as tunnel:
print("performing connection")
conn = pymysql.connect(
host="127.0.0.1",
user=self.sql_username,
password=self.sql_password,
db=self.sql_main_database,
port=tunnel.local_bind_port)
query = query_string
print("Querying")
data = pd.read_sql_query(query, conn)
print("Done!")
conn.close()
return data
The code is working well, but when the query is not well defined, the notebook freezes.
Then, I tried to use a try/catch, and the code ended like this
def executeQuery(self, query_string):
try:
with SSHTunnelForwarder(
(
self.ssh_host,
self.ssh_port
),
ssh_username = self.ssh_user,
ssh_pkey = self.pkey,
remote_bind_address=(self.sql_hostname, self.sql_port)
) as tunnel:
try:
conn = pymysql.connect(
host = "127.0.0.1",
user = self.sql_username,
password = self.sql_password,
db = self.sql_main_database,
port = tunnel.local_bind_port
)
try:
query = query_string
data = pd.read_sql_query(query, conn)
return data
except DatabaseError as e:
Log.debug(self,str(e))
raise DatabaseError
except pymysql.err.InternalError as e:
Log.debug(self, str(e))
raise DataError
except Exception as e:
Log.debug(self, "[Error]Setting up database: \'" + self.sql_main_database + "\'")
raise DataError
The issue is that pd.read_sql_query never stops so the except is never called, the try won't fail, and the process will just continue forever
The timeout workaround is not possible, because the queries don't have defined execution times and some of them can stay in processing during a couple of hours.
I'm not sure how to fix it.
Indeed the problem was not on the connector, just updating the jupyter version was needed.
Suppose the following scenario in using Zabbix 4.2. We have a core switch, two distributed switches and 20 access switches, where the distributed switches are connected to the core and 10 access switches are connected to each distributed switch. I am monitoring all of them using SNMP v2c and using the template cisco switches (the official one). Now the problem arises as I cannot define device dependency in zabbix easily. By easily, I mean that if a distributed switch goes out, I want to have the alarm for that device and not for all access switches connected to it. I could define it as follows. Change the triggers for each device and made them dependent on the corresponding trigger for distributed switches. However, this is too time consuming. What should I do? Any help is appreciated.
You are right, there isn't an easy way to set this kind of dependancy.
I had to manage the same situation a while ago and I wrote a python dependancy setter which uses a "dependent hostgroup <--> master host" logic.
You can modify it to fit your needs (see masterTargetTriggerDescription and slaveTargetTriggerDescription for the dependancy targets), it works but contains little error checking: use at your own risk!
import csv
import re
import json
from zabbix.api import ZabbixAPI
# Zabbix Server endpoint
zabbixServer = 'https://yourzabbix/zabbix/'
zabbixUser = 'admin'
zabbixPass = 'zabbix'
zapi = ZabbixAPI(url=zabbixServer, user=zabbixUser, password=zabbixPass)
# Hostgrop variables - to reference IDs while building API parameters
hostGroupNames = [] # list = array
hostGroupId = {} # dict = associative array
# Csv file for dep settings - see the format:
"""
Hostgroup;Master
ACCESS_1;DistSwitch1
ACCESS_2;DistSwitch1
ACCESS_5;DistSwitch2
ACCESS_6;DistSwitch2
DIST;CoreSwitch1
"""
fileName = 'dependancy.csv'
masterTargetTriggerDescription = '{HOST.NAME} is unavailable by ICMP'
slaveTargetTriggerDescription = '{HOST.NAME} is unavailable by ICMP|Zabbix agent on {HOST.NAME} is unreachable'
# Read CSV file
hostFile = open(fileName)
hostReader = csv.reader(hostFile, delimiter=';', quotechar='|')
hostData = list(hostReader)
# CSV Parsing
for line in hostData:
hostgroupName = line[0]
masterName = line[1]
slaveIds = []
masterId = zapi.get_id('host', item=masterName, with_id=False, hostid=None)
hostGroupId = zapi.get_id('hostgroup', item=hostgroupName, with_id=False, hostid=None)
masterTriggerObj = zapi.trigger.get(hostids=masterId, filter=({'description': masterTargetTriggerDescription}) )
print "Group: " + hostgroupName + " - ID: " + str(hostGroupId)
print "Master host: " + masterName + " - ID: " + str(masterId)
print "Master trigger: " + masterTriggerObj[0]['description'] + " - ID: " + str(masterTriggerObj[0]['triggerid'])
# cycle through slave hosts
hostGroupObj = zapi.hostgroup.get(groupids=hostGroupId, selectHosts='extend')
for host in hostGroupObj[0]['hosts']:
#exclude master
if host['hostid'] != str(masterId):
print " - Host Name: " + host['name'] + " - ID: " + host['hostid'] + " - MASTER: " + str(masterId)
# cycle for all slave's triggers
slaveTargetTriggerObj = zapi.trigger.get(hostids=host['hostid'])
#print json.dumps(slaveTargetTriggerObj)
for slaveTargetTrigger in slaveTargetTriggerObj:
# search for dependancy targets
if re.search(slaveTargetTriggerDescription, slaveTargetTrigger['description'] ,re.IGNORECASE):
print " - Trigger: " + slaveTargetTrigger['description'] + " - ID: " + slaveTargetTrigger['triggerid']
# Clear existing dep. from the trigger, then create the new dep.
clear = zapi.trigger.deletedependencies(triggerid=slaveTargetTrigger['triggerid'].encode())
result = zapi.trigger.adddependencies(triggerid=slaveTargetTrigger['triggerid'].encode(), dependsOnTriggerid=masterTriggerObj[0]['triggerid'])
print "----------------------------------------"
print ""
I updated the code contributed by Simone Zabberoni and rewritten it to work with Python 3, PyZabbix, and YAML.
#!/usr/bin/python3
import re
import yaml
#https://pypi.org/project/py-zabbix/
from pyzabbix import ZabbixAPI
# Zabbix Server endpoint
zabbix_server = 'https://zabbix.example.com/zabbix/'
zabbix_user = 'zbxuser'
zabbix_pass = 'zbxpassword'
# Create ZabbixAPI class instance
zapi = ZabbixAPI(zabbix_server)
# Enable HTTP auth
zapi.session.auth = (zabbix_user, zabbix_pass)
# Login (in case of HTTP Auth, only the username is needed, the password, if passed, will be ignored)
zapi.login(zabbix_user, zabbix_pass)
# Hostgrop variables - to reference IDs while building API parameters
hostGroupNames = [] # list = array
hostGroupId = {} # dict = associative array
# yaml file for dep settings - see the format:
"""
pvebar16 CTs:
master: pvebar16.example.com
masterTargetTriggerDescription: 'is unavailable by ICMP'
slaveTargetTriggerDescription: 'is unavailable by ICMP|Zabbix agent is unreachable for 5 minutes'
"""
fileName = 'dependancy.yml'
with open('dependancy.yml') as f:
hostData = yaml.load(f)
for groupyml in hostData.keys():
masterTargetTriggerDescription = hostData[groupyml]['masterTargetTriggerDescription']
slaveTargetTriggerDescription = hostData[groupyml]['slaveTargetTriggerDescription']
masterName = hostData[groupyml]['master']
hostgroupName = groupyml
slaveIds = []
masterId = zapi.host.get(filter={'host': masterName},output=['hostid'])[0]['hostid']
hostGroupId = zapi.hostgroup.get(filter={'name': hostgroupName},output=['groupid'])[0]['groupid']
masterTriggerObj = zapi.trigger.get(host=masterName, filter={'description': masterTargetTriggerDescription}, output=['triggerid','description'])
print("Group: " + hostgroupName + " - ID: " + str(hostGroupId))
print("Master host: " + masterName + " - ID: " + str(masterId))
print("Master trigger: " + masterTriggerObj[0]['description'] + " - ID: " + str(masterTriggerObj[0]['triggerid']))
# cycle through slave hosts
hostGroupObj = zapi.hostgroup.get(groupids=hostGroupId, selectHosts='extend')
for host in hostGroupObj[0]['hosts']:
#exclude master
if host['hostid'] != str(masterId):
print(" - Host Name: " + host['name'] + " - ID: " + host['hostid'] + " - MASTER: " + str(masterId))
# cycle for all slave's triggers
slaveTargetTriggerObj = zapi.trigger.get(hostids=host['hostid'])
for slaveTargetTrigger in slaveTargetTriggerObj:
# search for dependancy targets
if re.search(slaveTargetTriggerDescription, slaveTargetTrigger['description'] ,re.IGNORECASE):
print(" - Trigger: " + slaveTargetTrigger['description'] + " - ID: " + slaveTargetTrigger['triggerid'])
# Clear existing dep. from the trigger, then create the new dep.
clear = zapi.trigger.deletedependencies(triggerid=slaveTargetTrigger['triggerid'])
result = zapi.trigger.adddependencies(triggerid=slaveTargetTrigger['triggerid'], dependsOnTriggerid=masterTriggerObj[0]['triggerid'])
print("----------------------------------------")
print("")
Folks,
The following python script is terminating with
job state = FAILED
and
Last State Change: Access denied checking streaming input path: s3n://elasticmapreduce/samples/wordcount/input/
Code:
import boto
import boto.emr
from boto.emr.step import StreamingStep
from boto.emr.bootstrap_action import BootstrapAction
import time
S3_BUCKET="mytesetbucket123asdf"
conn = boto.connect_emr()
step = StreamingStep(
name='Wordcount',
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
reducer='aggregate',
input='s3n://elasticmapreduce/samples/wordcount/input/',
output='s3n://' + S3_BUCKET + '/wordcount/output/2013-10-25')
jobid = conn.run_jobflow(
name="test",
log_uri="s3://" + S3_BUCKET + "/logs/",
visible_to_all_users="True",
steps = [step],)
state = conn.describe_jobflow(jobid).state
print "job state = ", state
print "job id = ", jobid
while state != u'COMPLETED':
print time.localtime()
time.sleep(10)
state = conn.describe_jobflow(jobid).state
print conn.describe_jobflow(jobid)
print "job state = ", state
print "job id = ", jobid
print "final output can be found in s3://" + S3_BUCKET + "/output" + TIMESTAMP
print "try: $ s3cmd sync s3://" + S3_BUCKET + "/output" + TIMESTAMP + " ."
The problem is somewhere in boto... If we specify IAM user instead of using Roles, job works perfectly. EMR supports IAM Roles ofcourse... and the IAM role we tested with has full rights to execute any task, so its not a mis-configuration issue...