-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleanup.sql
54 lines (46 loc) · 4.25 KB
/
cleanup.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
-- Script for partially anonymizing and cleaning up the dump.
-- WARNING: The script is not guaranteed to cover any kind of metastore
-- (not even close). It is mostly suitable for cases encountered so far.
-- Statements to remove potentially sensitive URIs and usernames.
UPDATE "CTLGS" SET "LOCATION_URI" = REGEXP_REPLACE("LOCATION_URI",'(s3a|hdfs)://[^/]+','hdfs://localhost:40889');
UPDATE "DBS" SET "DB_LOCATION_URI" = REGEXP_REPLACE("DB_LOCATION_URI",'(s3a|hdfs)://[^/]+','hdfs://localhost:40889');
UPDATE "DBS" SET "OWNER_NAME" = 'hive' WHERE "OWNER_NAME" IS NOT NULL AND "OWNER_NAME" <> 'public';
UPDATE "SDS" SET "LOCATION" = REGEXP_REPLACE("LOCATION",'(s3a|hdfs)://[^/]+','hdfs://localhost:40889') WHERE "LOCATION" IS NOT NULL;
UPDATE "TBLS" SET "OWNER" = 'hive' WHERE "OWNER" IS NOT NULL AND "OWNER" <> 'public';
-- Statements to cleanup transactions related state in metastore
TRUNCATE TABLE "HIVE_LOCKS" CASCADE ;
TRUNCATE TABLE "NOTIFICATION_LOG";
TRUNCATE TABLE "TXNS" CASCADE;
TRUNCATE TABLE "TXN_TO_WRITE_ID"CASCADE;
TRUNCATE TABLE "TXN_COMPONENTS" CASCADE;
TRUNCATE TABLE "COMPLETED_TXN_COMPONENTS" CASCADE;
TRUNCATE TABLE "NEXT_WRITE_ID" CASCADE;
TRUNCATE TABLE "NEXT_LOCK_ID" CASCADE;
TRUNCATE TABLE "COMPACTION_QUEUE" CASCADE;
TRUNCATE TABLE "NEXT_COMPACTION_QUEUE_ID" CASCADE;
TRUNCATE TABLE "COMPLETED_COMPACTIONS" CASCADE;
TRUNCATE TABLE "AUX_TABLE" CASCADE;
TRUNCATE TABLE "WRITE_SET" CASCADE;
TRUNCATE TABLE "REPL_TXN_MAP" CASCADE;
TRUNCATE TABLE "MATERIALIZATION_REBUILD_LOCKS" CASCADE;
TRUNCATE TABLE "MIN_HISTORY_LEVEL" CASCADE;
INSERT INTO "NEXT_LOCK_ID" VALUES(1);
INSERT INTO "NEXT_COMPACTION_QUEUE_ID" VALUES(1);
-- Statements to replace Hive default database with tpcds
DELETE FROM "TABLE_PARAMS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "TAB_COL_STATS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "MV_TABLES_USED" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "TBL_PRIVS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "PARTITION_KEYS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "PARTITION_KEY_VALS" WHERE "PART_ID" IN (SELECT "PART_ID" FROM "PARTITIONS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default')));
DELETE FROM "PARTITION_PARAMS" WHERE "PART_ID" IN (SELECT "PART_ID" FROM "PARTITIONS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default')));
DELETE FROM "PART_COL_STATS" WHERE "PART_ID" IN (SELECT "PART_ID" FROM "PARTITIONS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default')));
DELETE FROM "PARTITIONS" WHERE "TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "KEY_CONSTRAINTS" WHERE "PARENT_TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "KEY_CONSTRAINTS" WHERE "CHILD_TBL_ID" IN (SELECT "TBLS"."TBL_ID" FROM "TBLS" WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "NAME" = 'default'));
DELETE FROM "TBLS" WHERE "DB_ID" IN (SELECT "DB_ID" FROM "DBS" WHERE "NAME" = 'default');
UPDATE "TBLS" SET "DB_ID" = (SELECT "DB_ID" FROM "DBS" WHERE "NAME" = 'default') WHERE EXISTS (SELECT 1 FROM "DBS" WHERE "DBS"."DB_ID"="TBLS"."DB_ID" AND "DBS"."NAME" LIKE 'tpcds%');
TRUNCATE "DATABASE_PARAMS";
DELETE FROM "DBS" WHERE "NAME" LIKE 'tpcds%';
UPDATE "TAB_COL_STATS" SET "DB_NAME" = 'default' WHERE "DB_NAME" LIKE 'tpcds%';
UPDATE "PART_COL_STATS" SET "DB_NAME" = 'default' WHERE "DB_NAME" LIKE 'tpcds%';