init/jupyter/authorization/gravitino-access-control-example.ipynb (755 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "id": "df953e1a-b48a-4eda-b099-c89c9449fdad", "metadata": {}, "source": [ "## Gravitino access control" ] }, { "cell_type": "markdown", "source": [ "This demo shows that authorizing the Hive Catalog through Gravitino and then using Spark to query this hive datasource authenticates the user's operation, allowing or denying the user's operation. \n", " You can log in to the Apache Ranger admin service to see the permissions.\n", "\n", "+ Apache Ranger admin service: http://localhost:6080/, the login user name is `admin` and the password is `rangerR0cks!`.\n", "+ Apache Gravitino access control document: https://gravitino.apache.org/docs/latest/security/access-control" ], "metadata": { "collapsed": false }, "id": "d75740e99c5ed90e" }, { "cell_type": "markdown", "source": [ "### Add the manager to the metalake" ], "metadata": { "collapsed": false }, "id": "42a9fa39f5d9ef03" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "headers = {\n", " 'Accept': 'application/vnd.gravitino.v1+json',\n", " 'Content-Type': 'application/json',\n", "}\n", "\n", "data = {\n", " \"name\": \"manager\"\n", "}\n", "\n", "response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))\n", "\n", "# print the response text (the content of the requested file):\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "b09c15849e20a095" }, { "cell_type": "markdown", "source": [ "\n", "\n", "### Create a Hive catalog with Ranger authorization" ], "metadata": { "collapsed": false }, "id": "cd2dc99e370d1c71" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "url = \"http://gravitino:8090/api/metalakes/metalake_demo/catalogs\"\n", "headers = {\n", " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", " \"Content-Type\": \"application/json\",\n", " \"Authorization\": \"Basic bWFuYWdlcjoxMjM=\",\n", "}\n", "data = {\n", " \"name\": \"catalog_hive_ranger\",\n", " \"type\": \"RELATIONAL\",\n", " \"provider\": \"hive\",\n", " \"comment\": \"comment\",\n", " \"properties\": {\n", " \"metastore.uris\": \"thrift://hive:9083\",\n", " \"authorization-provider\": \"ranger\",\n", " \"authorization.ranger.admin.url\": \"http://ranger:6080\",\n", " \"authorization.ranger.auth.type\": \"simple\",\n", " \"authorization.ranger.username\": \"admin\",\n", " \"authorization.ranger.password\": \"rangerR0cks!\",\n", " \"authorization.ranger.service.type\": \"HadoopSQL\",\n", " \"authorization.ranger.service.name\": \"hiveDev\"\n", " }\n", "}\n", "\n", "response = requests.post(url, headers=headers, data=json.dumps(data))\n", "\n", "print(response.text)\n" ], "metadata": { "collapsed": false }, "id": "c39ff0b34e25c2e4" }, { "cell_type": "markdown", "id": "6cdaf5b0-18e1-45df-8e8f-13376e41c421", "metadata": {}, "source": [ "### Install PySpark" ] }, { "cell_type": "code", "execution_count": null, "id": "380aa111-a955-4a48-835f-8a5402bf1a4a", "metadata": {}, "outputs": [], "source": [ "import pyspark\n", "import os\n", "from pyspark.sql import SparkSession\n", "os.environ['HADOOP_USER_NAME']=\"manager\"\n", "\n", "spark = SparkSession.builder \\\n", " .appName(\"PySpark SQL Example\") \\\n", " .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n", " .config(\"spark.jars\", \"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n", " /tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.8.0-incubating.jar,\\\n", " /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n", " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n", " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n", " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n", " .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n", " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n", " .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n", " .config(\"spark.locality.wait.node\", \"0\") \\\n", " .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n", " .config(\"spark.sql.extensions\", \"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n", " .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n", " .enableHiveSupport() \\\n", " .getOrCreate()" ] }, { "cell_type": "markdown", "source": [ "### Show databases list under the catalog_hive" ], "metadata": { "collapsed": false }, "id": "499295c00807fb0d" }, { "cell_type": "code", "execution_count": null, "id": "d70e6c04-bb61-4b5b-8525-41a4a5a34b54", "metadata": {}, "outputs": [], "source": [ "spark.sql(\"USE catalog_hive_ranger\")\n", "spark.sql(\"SHOW DATABASES\").show()" ] }, { "cell_type": "markdown", "source": [ "### Create database access control" ], "metadata": { "collapsed": false }, "id": "e33c1f5bdee0b3b1" }, { "cell_type": "code", "execution_count": null, "id": "d2fae330-e932-4482-9622-bcc454844ff8", "metadata": {}, "outputs": [], "source": [ "spark.sql(\"CREATE DATABASE IF NOT EXISTS access_control;\")\n", "spark.sql(\"USE catalog_hive_ranger\")\n", "spark.sql(\"SHOW DATABASES\").show()" ] }, { "cell_type": "markdown", "source": [ "### Create table customers" ], "metadata": { "collapsed": false }, "id": "f416641cedd311ca" }, { "cell_type": "code", "execution_count": null, "id": "c34d2bfc-f13a-4dcc-ae40-f2a2874bfffa", "metadata": {}, "outputs": [], "source": [ "spark.sql(\"USE access_control;\")\n", "spark.sql(\"CREATE TABLE customers (customer_id int, customer_name string, customer_email string);\")\n", "spark.sql(\"SHOW TABLES\").show()" ] }, { "cell_type": "markdown", "source": [ "### Select and insert data for the table" ], "metadata": { "collapsed": false }, "id": "68adabbd976e106b" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');\")\n", "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');\")\n", "spark.sql(\"SELECT * FROM customers\").show()" ], "metadata": { "collapsed": false }, "id": "7113e44ad213ff45" }, { "cell_type": "markdown", "source": [ "### You should click the jupyter button to restart the notebook, we will start a new spark context with user lisa" ], "metadata": { "collapsed": false }, "id": "ee84f44711c7a939" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import pyspark\n", "import os\n", "from pyspark.sql import SparkSession\n", "os.environ['HADOOP_USER_NAME']=\"lisa\"\n", "\n", "spark = SparkSession.builder \\\n", " .appName(\"PySpark SQL Example\") \\\n", " .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n", " .config(\"spark.jars\", \"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n", " /tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.8.0-incubating.jar,\\\n", " /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n", " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n", " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n", " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n", " .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n", " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n", " .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n", " .config(\"spark.locality.wait.node\", \"0\") \\\n", " .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n", " .config(\"spark.sql.extensions\", \"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n", " .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n", " .enableHiveSupport() \\\n", " .getOrCreate()" ], "metadata": { "collapsed": false }, "id": "81f1b47f026aa59d" }, { "cell_type": "markdown", "source": [ "#### Add Spark execute user `lisa` into Gravitino\n", "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#add-a-user" ], "metadata": { "collapsed": false }, "id": "8530bac14f93ef26" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "headers = {\n", " 'Accept': 'application/vnd.gravitino.v1+json',\n", " 'Content-Type': 'application/json',\n", "}\n", "\n", "data = {\n", " \"name\": \"lisa\"\n", "}\n", "\n", "response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))\n", "\n", "# print the response text (the content of the requested file):\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "25ca43caa7aa5a30" }, { "cell_type": "markdown", "source": [ "### Create a developer role" ], "metadata": { "collapsed": false }, "id": "23b6521d01362ba2" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "url = \"http://gravitino:8090/api/metalakes/metalake_demo/roles\"\n", "headers = {\n", " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", " \"Content-Type\": \"application/json\",\n", "}\n", "data = {\n", " \"name\": \"developer\",\n", " \"properties\": {\"k1\": \"v1\"},\n", " \"securableObjects\": [\n", " {\n", " \"fullName\": \"catalog_hive_ranger\",\n", " \"type\": \"CATALOG\",\n", " \"privileges\": [\n", " {\n", " \"name\": \"USE_CATALOG\",\n", " \"condition\": \"ALLOW\"\n", " }\n", " ]\n", " },\n", " {\n", " \"fullName\": \"catalog_hive_ranger.access_control\",\n", " \"type\": \"SCHEMA\",\n", " \"privileges\": [\n", " {\n", " \"name\": \"USE_SCHEMA\",\n", " \"condition\": \"ALLOW\"\n", " },\n", " {\n", " \"name\": \"CREATE_TABLE\",\n", " \"condition\": \"ALLOW\"\n", " },\n", " {\n", " \"name\": \"MODIFY_TABLE\",\n", " \"condition\": \"ALLOW\"\n", " },\n", " {\n", " \"name\": \"SELECT_TABLE\",\n", " \"condition\": \"ALLOW\"\n", " }\n", " ]\n", " }\n", " ]\n", "}\n", "\n", "response = requests.post(url, headers=headers, data=json.dumps(data))\n", "\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "60527c15252f9bc8" }, { "cell_type": "markdown", "source": [ "### Grant role to Spark execute user lisa\n", "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#grant-roles-to-a-user" ], "metadata": { "collapsed": false }, "id": "ad03479cbb062c80" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant\"\n", "headers = {\n", " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", " \"Content-Type\": \"application/json\",\n", "}\n", "data = {\n", " \"roleNames\": [\"developer\"]\n", "}\n", "\n", "response = requests.put(url, headers=headers, data=json.dumps(data))\n", "\n", "# print status code and response text\n", "print(response.status_code)\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "822c265e4981cf8d" }, { "cell_type": "markdown", "source": [ "### Select and insert data for the table" ], "metadata": { "collapsed": false }, "id": "342b2b8b96235f06" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "spark.sql(\"USE catalog_hive_ranger;\")\n", "spark.sql(\"USE access_control;\")\n", "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');\")\n", "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');\")\n", "spark.sql(\"SELECT * FROM customers\").show()" ], "metadata": { "collapsed": false }, "id": "968b1f56d7cf3167" }, { "cell_type": "markdown", "source": [ "### Create another table" ], "metadata": { "collapsed": false }, "id": "e633d159e006740d" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "spark.sql(\"CREATE TABLE another_customers (customer_id int, customer_name string, customer_email string);\")\n", "spark.sql(\"SHOW TABLES;\").show()" ], "metadata": { "collapsed": false }, "id": "e2b59346fbf058ce" }, { "cell_type": "markdown", "source": [ "### Succeed to drop his table" ], "metadata": { "collapsed": false }, "id": "1e5624cfef2048fd" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "spark.sql(\"DROP TABLE another_customers;\")\n", "spark.sql(\"SHOW TABLES;\").show()" ], "metadata": { "collapsed": false }, "id": "b62051095545975" }, { "cell_type": "markdown", "source": [ "### Fail to drop others' table" ], "metadata": { "collapsed": false }, "id": "161c5822bf5b3f67" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "from py4j.protocol import Py4JJavaError\n", "\n", "try:\n", " spark.sql(\"DROP TABLE customers;\")\n", "except Py4JJavaError as e:\n", " print(\"An error occurred: \", e.java_exception)" ], "metadata": { "collapsed": false }, "id": "3d8c1a7563009cdc" }, { "cell_type": "markdown", "source": [ "## Change another role for the user" ], "metadata": { "collapsed": false }, "id": "1a51eaa11e56d2f7" }, { "cell_type": "markdown", "source": [ "### Revoke role from Spark execute user lisa\n", "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#revoke-roles-from-a-user" ], "metadata": { "collapsed": false }, "id": "5a6bd716b808b53d" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/revoke\"\n", "headers = {\n", " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", " \"Content-Type\": \"application/json\",\n", "}\n", "data = {\n", " \"roleNames\": [\"developer\"]\n", "}\n", "\n", "response = requests.put(url, headers=headers, data=json.dumps(data))\n", "\n", "# print status code and response text\n", "print(response.status_code)\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "21229241aa84650a" }, { "cell_type": "markdown", "source": [ "### Create a analyst role" ], "metadata": { "collapsed": false }, "id": "947303c40f7e8835" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "url = \"http://gravitino:8090/api/metalakes/metalake_demo/roles\"\n", "headers = {\n", " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", " \"Content-Type\": \"application/json\",\n", "}\n", "data = {\n", " \"name\": \"analyst\",\n", " \"properties\": {\"k1\": \"v1\"},\n", " \"securableObjects\": [\n", " {\n", " \"fullName\": \"catalog_hive_ranger\",\n", " \"type\": \"CATALOG\",\n", " \"privileges\": [\n", " {\n", " \"name\": \"USE_CATALOG\",\n", " \"condition\": \"ALLOW\"\n", " }\n", " ]\n", " },\n", " {\n", " \"fullName\": \"catalog_hive_ranger.access_control\",\n", " \"type\": \"SCHEMA\",\n", " \"privileges\": [\n", " {\n", " \"name\": \"USE_SCHEMA\",\n", " \"condition\": \"ALLOW\"\n", " },\n", " {\n", " \"name\": \"SELECT_TABLE\",\n", " \"condition\": \"ALLOW\"\n", " }\n", " ]\n", " }\n", " ]\n", "}\n", "\n", "response = requests.post(url, headers=headers, data=json.dumps(data))\n", "\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "7117a353c5843a1e" }, { "cell_type": "markdown", "source": [ "### Grant a analyst to the user" ], "metadata": { "collapsed": false }, "id": "78a722b6539a73ba" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import requests\n", "import json\n", "\n", "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant\"\n", "headers = {\n", " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", " \"Content-Type\": \"application/json\",\n", "}\n", "data = {\n", " \"roleNames\": [\"analyst\"]\n", "}\n", "\n", "response = requests.put(url, headers=headers, data=json.dumps(data))\n", "\n", "# print status code and response text\n", "print(response.status_code)\n", "print(response.text)" ], "metadata": { "collapsed": false }, "id": "eab62ec65b36dbc6" }, { "cell_type": "markdown", "source": [ "### Succeed to select data from the table" ], "metadata": { "collapsed": false }, "id": "251a5c4001b7ecb3" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "spark.sql(\"SELECT * FROM customers\").show()" ], "metadata": { "collapsed": false }, "id": "9cfb4a73cd36e529" }, { "cell_type": "markdown", "source": [ "### Fail to insert the data to the table" ], "metadata": { "collapsed": false }, "id": "450de05e4252f16" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "from py4j.protocol import Py4JJavaError\n", "\n", "try:\n", " spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');\")\n", " spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');\")\n", "except Py4JJavaError as e:\n", " print(\"An error occurred: \", e.java_exception)" ], "metadata": { "collapsed": false }, "id": "de7d35aa98cb3001" } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }