# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN!
#
# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
[docs]
def get_provider_info():
return {
"package-name": "apache-airflow-providers-apache-spark",
"name": "Apache Spark",
"description": "`Apache Spark <https://spark.apache.org/>`__\n",
"integrations": [
{
"integration-name": "Apache Spark",
"external-doc-url": "https://spark.apache.org/",
"how-to-guide": ["/docs/apache-airflow-providers-apache-spark/operators.rst"],
"logo": "/docs/integration-logos/spark.png",
"tags": ["apache"],
}
],
"operators": [
{
"integration-name": "Apache Spark",
"python-modules": [
"airflow.providers.apache.spark.operators.spark_jdbc",
"airflow.providers.apache.spark.operators.spark_sql",
"airflow.providers.apache.spark.operators.spark_submit",
"airflow.providers.apache.spark.operators.spark_pyspark",
],
}
],
"hooks": [
{
"integration-name": "Apache Spark",
"python-modules": [
"airflow.providers.apache.spark.hooks.spark_connect",
"airflow.providers.apache.spark.hooks.spark_jdbc",
"airflow.providers.apache.spark.hooks.spark_jdbc_script",
"airflow.providers.apache.spark.hooks.spark_sql",
"airflow.providers.apache.spark.hooks.spark_submit",
],
}
],
"connection-types": [
{
"hook-class-name": "airflow.providers.apache.spark.hooks.spark_connect.SparkConnectHook",
"connection-type": "spark_connect",
"ui-field-behaviour": {
"hidden-fields": ["schema"],
"relabeling": {"password": "Token", "login": "User ID"},
},
"conn-fields": {
"use_ssl": {"label": "Use SSL", "schema": {"type": ["boolean", "null"], "default": False}}
},
},
{
"hook-class-name": "airflow.providers.apache.spark.hooks.spark_jdbc.SparkJDBCHook",
"connection-type": "spark_jdbc",
},
{
"hook-class-name": "airflow.providers.apache.spark.hooks.spark_sql.SparkSqlHook",
"connection-type": "spark_sql",
"ui-field-behaviour": {
"hidden-fields": ["schema", "login", "password", "extra"],
"relabeling": {},
},
"conn-fields": {
"queue": {
"label": "YARN queue",
"description": "Default YARN queue to use",
"schema": {"type": ["string", "null"]},
}
},
},
{
"hook-class-name": "airflow.providers.apache.spark.hooks.spark_submit.SparkSubmitHook",
"connection-type": "spark",
"ui-field-behaviour": {
"hidden-fields": ["schema", "login", "password", "extra"],
"relabeling": {},
"placeholders": {"keytab": "<base64 encoded Keytab Content>"},
},
"conn-fields": {
"queue": {
"label": "YARN queue",
"description": "Default YARN queue to use",
"schema": {"type": ["string", "null"]},
},
"deploy-mode": {
"label": "Deploy mode",
"description": "Must be client or cluster",
"schema": {"type": ["string", "null"], "default": "client"},
},
"spark-binary": {
"label": "Spark binary",
"description": "Must be one of: spark-submit, spark2-submit, spark3-submit",
"schema": {"type": ["string", "null"], "default": "spark-submit"},
},
"namespace": {"label": "Kubernetes namespace", "schema": {"type": ["string", "null"]}},
"principal": {"label": "Principal", "schema": {"type": ["string", "null"]}},
"keytab": {
"label": "Keytab",
"description": "Run the command `base64 <your-keytab-path>` and use its output.",
"schema": {"type": ["string", "null"], "format": "password"},
},
},
},
],
"task-decorators": [
{
"class-name": "airflow.providers.apache.spark.decorators.pyspark.pyspark_task",
"name": "pyspark",
}
],
}