## Gravitino access control

This demo shows that authorizing the Hive Catalog through Gravitino and then using Spark to query this hive datasource authenticates the user's operation, allowing or denying the user's operation. 
 You can log in to the Apache Ranger admin service to see the permissions.

+ Apache Ranger admin service: http://localhost:6080/, the login user name is `admin` and the password is `rangerR0cks!`.
+ Apache Gravitino access control document: https://gravitino.apache.org/docs/latest/security/access-control

### Add the manager to the metalake

In [None]:
import requests
import json

headers = {
    'Accept': 'application/vnd.gravitino.v1+json',
    'Content-Type': 'application/json',
}

data = {
    "name": "manager"
}

response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))

# print the response text (the content of the requested file):
print(response.text)



### Create a Hive catalog with Ranger authorization

In [None]:
import requests
import json
url = "http://gravitino:8090/api/metalakes/metalake_demo/catalogs"
headers = {
    "Accept": "application/vnd.gravitino.v1+json",
    "Content-Type": "application/json",
    "Authorization": "Basic bWFuYWdlcjoxMjM=",
}
data = {
    "name": "catalog_hive_ranger",
    "type": "RELATIONAL",
    "provider": "hive",
    "comment": "comment",
    "properties": {
        "metastore.uris": "thrift://hive:9083",
        "authorization-provider": "ranger",
        "authorization.ranger.admin.url": "http://ranger:6080",
        "authorization.ranger.auth.type": "simple",
        "authorization.ranger.username": "admin",
        "authorization.ranger.password": "rangerR0cks!",
        "authorization.ranger.service.type": "HadoopSQL",
        "authorization.ranger.service.name": "hiveDev"
    }
}

response = requests.post(url, headers=headers, data=json.dumps(data))

print(response.text)


### Install PySpark

In [None]:
import pyspark
import os
from pyspark.sql import SparkSession
os.environ['HADOOP_USER_NAME']="manager"

spark = SparkSession.builder \
    .appName("PySpark SQL Example") \
    .config("spark.plugins", "org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin") \
    .config("spark.jars", "/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\
                           /tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.8.0-incubating.jar,\
                           /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar") \
    .config("spark.sql.gravitino.uri", "http://gravitino:8090") \
    .config("spark.sql.gravitino.metalake", "metalake_demo") \
    .config("spark.sql.gravitino.enableIcebergSupport", "true") \
    .config("spark.sql.catalog.catalog_rest", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.catalog_rest.type", "rest") \
    .config("spark.sql.catalog.catalog_rest.uri", "http://gravitino:9001/iceberg/") \
    .config("spark.locality.wait.node", "0") \
    .config("spark.driver.extraClassPath", "/tmp/gravitino") \
    .config("spark.sql.extensions", "org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension") \
    .config("spark.sql.warehouse.dir", "hdfs://hive:9000/user/hive/warehouse") \
    .enableHiveSupport() \
    .getOrCreate()

### Show databases list under the catalog_hive

In [None]:
spark.sql("USE catalog_hive_ranger")
spark.sql("SHOW DATABASES").show()

### Create database access control

In [None]:
spark.sql("CREATE DATABASE IF NOT EXISTS access_control;")
spark.sql("USE catalog_hive_ranger")
spark.sql("SHOW DATABASES").show()

### Create table customers

In [None]:
spark.sql("USE access_control;")
spark.sql("CREATE TABLE customers (customer_id int, customer_name string, customer_email string);")
spark.sql("SHOW TABLES").show()

### Select and insert data for the table

In [None]:
spark.sql("INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');")
spark.sql("INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');")
spark.sql("SELECT * FROM customers").show()

### You should click the jupyter button to restart the notebook, we will start a new spark context with user lisa

In [None]:
import pyspark
import os
from pyspark.sql import SparkSession
os.environ['HADOOP_USER_NAME']="lisa"

spark = SparkSession.builder \
    .appName("PySpark SQL Example") \
    .config("spark.plugins", "org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin") \
    .config("spark.jars", "/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\
                           /tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.8.0-incubating.jar,\
                           /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar") \
    .config("spark.sql.gravitino.uri", "http://gravitino:8090") \
    .config("spark.sql.gravitino.metalake", "metalake_demo") \
    .config("spark.sql.gravitino.enableIcebergSupport", "true") \
    .config("spark.sql.catalog.catalog_rest", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.catalog_rest.type", "rest") \
    .config("spark.sql.catalog.catalog_rest.uri", "http://gravitino:9001/iceberg/") \
    .config("spark.locality.wait.node", "0") \
    .config("spark.driver.extraClassPath", "/tmp/gravitino") \
    .config("spark.sql.extensions", "org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension") \
    .config("spark.sql.warehouse.dir", "hdfs://hive:9000/user/hive/warehouse") \
    .enableHiveSupport() \
    .getOrCreate()

#### Add Spark execute user `lisa` into Gravitino
+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#add-a-user

In [None]:
import requests
import json

headers = {
    'Accept': 'application/vnd.gravitino.v1+json',
    'Content-Type': 'application/json',
}

data = {
    "name": "lisa"
}

response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))

# print the response text (the content of the requested file):
print(response.text)

### Create a developer role

In [None]:
import requests
import json

url = "http://gravitino:8090/api/metalakes/metalake_demo/roles"
headers = {
    "Accept": "application/vnd.gravitino.v1+json",
    "Content-Type": "application/json",
}
data = {
    "name": "developer",
    "properties": {"k1": "v1"},
    "securableObjects": [
        {
            "fullName": "catalog_hive_ranger",
            "type": "CATALOG",
            "privileges": [
                {
                    "name": "USE_CATALOG",
                    "condition": "ALLOW"
                }
            ]
        },
        {
            "fullName": "catalog_hive_ranger.access_control",
            "type": "SCHEMA",
            "privileges": [
                {
                    "name": "USE_SCHEMA",
                    "condition": "ALLOW"
                },
                {
                    "name": "CREATE_TABLE",
                    "condition": "ALLOW"
                },
                {
                    "name": "MODIFY_TABLE",
                    "condition": "ALLOW"
                },
                {
                    "name": "SELECT_TABLE",
                    "condition": "ALLOW"
                }
            ]
        }
    ]
}

response = requests.post(url, headers=headers, data=json.dumps(data))

print(response.text)

### Grant role to Spark execute user lisa
+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#grant-roles-to-a-user

In [None]:
import requests
import json

url = "http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant"
headers = {
    "Accept": "application/vnd.gravitino.v1+json",
    "Content-Type": "application/json",
}
data = {
    "roleNames": ["developer"]
}

response = requests.put(url, headers=headers, data=json.dumps(data))

# print status code and response text
print(response.status_code)
print(response.text)

### Select and insert data for the table

In [None]:
spark.sql("USE catalog_hive_ranger;")
spark.sql("USE access_control;")
spark.sql("INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');")
spark.sql("INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');")
spark.sql("SELECT * FROM customers").show()

### Create another table

In [None]:
spark.sql("CREATE TABLE another_customers (customer_id int, customer_name string, customer_email string);")
spark.sql("SHOW TABLES;").show()

### Succeed to drop his table

In [None]:
spark.sql("DROP TABLE another_customers;")
spark.sql("SHOW TABLES;").show()

### Fail to drop others' table

In [None]:
from py4j.protocol import Py4JJavaError

try:
    spark.sql("DROP TABLE customers;")
except Py4JJavaError as e:
    print("An error occurred: ", e.java_exception)

## Change another role for the user

### Revoke role from Spark execute user lisa
+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#revoke-roles-from-a-user

In [None]:
import requests
import json

url = "http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/revoke"
headers = {
    "Accept": "application/vnd.gravitino.v1+json",
    "Content-Type": "application/json",
}
data = {
    "roleNames": ["developer"]
}

response = requests.put(url, headers=headers, data=json.dumps(data))

# print status code and response text
print(response.status_code)
print(response.text)

### Create a analyst role

In [None]:
import requests
import json

url = "http://gravitino:8090/api/metalakes/metalake_demo/roles"
headers = {
    "Accept": "application/vnd.gravitino.v1+json",
    "Content-Type": "application/json",
}
data = {
    "name": "analyst",
    "properties": {"k1": "v1"},
    "securableObjects": [
        {
            "fullName": "catalog_hive_ranger",
            "type": "CATALOG",
            "privileges": [
                {
                    "name": "USE_CATALOG",
                    "condition": "ALLOW"
                }
            ]
        },
        {
            "fullName": "catalog_hive_ranger.access_control",
            "type": "SCHEMA",
            "privileges": [
                {
                    "name": "USE_SCHEMA",
                    "condition": "ALLOW"
                },
                {
                    "name": "SELECT_TABLE",
                    "condition": "ALLOW"
                }
            ]
        }
    ]
}

response = requests.post(url, headers=headers, data=json.dumps(data))

print(response.text)

###  Grant a analyst to the user

In [None]:
import requests
import json

url = "http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant"
headers = {
    "Accept": "application/vnd.gravitino.v1+json",
    "Content-Type": "application/json",
}
data = {
    "roleNames": ["analyst"]
}

response = requests.put(url, headers=headers, data=json.dumps(data))

# print status code and response text
print(response.status_code)
print(response.text)

### Succeed to select data from the table

In [None]:
spark.sql("SELECT * FROM customers").show()

### Fail to insert the data to the table

In [None]:
from py4j.protocol import Py4JJavaError

try:
    spark.sql("INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','rory@123.com');")
    spark.sql("INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','jerry@dt.com');")
except Py4JJavaError as e:
    print("An error occurred: ", e.java_exception)