-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathiceberg-init.bash
More file actions
98 lines (58 loc) · 3.53 KB
/
iceberg-init.bash
File metadata and controls
98 lines (58 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
echo "What catalog are you using?"
echo "[1] HDFS | [2] NESSIE | [3] GLUE | [4] ARCTIC"
read selection
echo "Storing Locally or on S3?"
echo "[1] local | [2] S3"
read selection2
######################################################
if [ $selection2 -eq 1 ]
then
echo "What is the warehouse name? (folder to save data in)"
echo "example: datawarehouse"
read WAREHOUSE
IO_IMPL= "--conf spark.sql.catalog.iceberg.type=hadoop"
fi
if [ $selection2 -eq 2 ]
then
echo "What is the warehouse path? (S3 PATH TO SAVE DATA INTO)"
echo "example: s3a://my_bucket/subfolder"
read WAREHOUSE
echo "What is your AWS_ACCESS_KEY_ID"
read AWS_ACCESS_KEY_ID
export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID
echo "What is your AWS_ACCESS_KEY_ID"
read AWS_SECRET_ACCESS_KEY
export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY
echo "What is your S3 Region? (ex. us-east-1)"
read AWS_REGION
export AWS_REGION=$AWS_REGION
export AWS_DEFAULT_REGION=$AWS_REGION
IO_IMPL="--conf spark.sql.catalog.iceberg.io-impl=org.apache.iceberg.aws.s3.S3FileIO"
ICEBERG_PACKAGES=",software.amazon.awssdk:bundle:2.17.178,software.amazon.awssdk:url-connection-client:2.17.178"
fi
#####################################################
if [ $selection -eq 1 ]
then
echo "1 1"
COMMAND="spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.2.1$ICEBERG_PACKAGES --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.iceberg.warehouse=$WAREHOUSE $IO_IMPL"
fi
if [ $selection -eq 2 ]
then
echo "What is your Nessie Server URL"
read AWS_SECRET_ACCESS_KEY
COMMAND="spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.2.1,org.projectnessie:nessie-spark-extensions-3.3_2.12:0.44.0$ICEBERG_PACKAGES --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.nessie.NessieCatalog --conf spark.sql.catalog.iceberg.uri=$NESSIE_URI --conf spark.sql.catalog.iceberg.ref=main --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.iceberg.warehouse=$WAREHOUSE/iceberg-warehouse $IO_IMPL"
fi
if [ $selection -eq 3 ]
then
COMMAND="spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.2.1$ICEBERG_PACKAGES --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.iceberg.warehouse=$WAREHOUSE/iceberg-warehouse $IO_IMPL"
fi
if [ $selection -eq 4 ]
then
echo "What is your Nessie Server URL"
read NESSIE_URI
echo "What is your Nessie Auth Token"
read TOKEN
COMMAND="spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.2.1,org.projectnessie:nessie-spark-extensions-3.3_2.12:0.44.0$ICEBERG_PACKAGES --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.nessie.NessieCatalog --conf spark.sql.catalog.iceberg.uri=$NESSIE_URI --conf spark.sql.catalog.iceberg.ref=main --conf spark.sql.catalog.iceberg.authentication.type=BEARER --conf spark.sql.catalogiceberg.authentication.token=$TOKEN --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.iceberg.warehouse=$WAREHOUSE/iceberg-warehouse $IO_IMPL"
fi
echo $COMMAND
eval $COMMAND