Amazon Redshift便利ツール『amazon-redshift-utils』の便利SQLスクリプト紹介(AdminViews編)

2015.06.20

この記事は公開されてから1年以上経過しています。情報が古い可能性がありますので、ご注意ください。

Amazon Redshiftでは日々の作業を行う上で様々な情報を必要とし、その過程で『こういう情報が欲しい』という局面が多々あります。当ブログでも適宜『便利系SQL』として必要な情報を取得する為のSQLをご紹介して来ましたが、以下のGitHub上で展開されている各種スクリプトがとても有用そうでしたので2回に分けて中身を見てみたいと思います。当エントリは『管理系ビュー(AdminViews)』に関する内容となります。

目次

事前準備

当エントリで実行されるSQL文では、adminというスキーマを別途用意し、そこにビューを作成しています。なので事前にadminスキーマは用意しておいてください。

# CREATE SCHEMA admin;
CREATE SCHEMA

v_check_data_distribution.sql

スライス間のデータ分散状況を取得します。分散キーが適切な効果を発揮しているか確認するのに便利ですね。

CREATE OR REPLACE VIEW admin.v_check_data_distribution
AS
SELECT 
	slice
	,pgn.oid AS schema_oid
	,pgn.nspname AS schemaname
	,id AS tbl_oid
	,name AS tablename
	,rows AS rowcount_on_slice
	,SUM(rows) OVER (PARTITION BY name) AS total_rowcount
	,CASE
		WHEN rows IS NULL OR rows = 0 THEN 0
		ELSE ROUND(CAST(rows AS FLOAT) / CAST((SUM(rows) OVER (PARTITION BY id)) AS FLOAT) * 100, 3) 
		END AS distrib_pct
	,CASE
		WHEN rows IS NULL OR rows = 0 THEN 0
		ELSE ROUND(CAST((MIN(rows) OVER (PARTITION BY id)) AS FLOAT) / CAST((SUM(rows) OVER (PARTITION BY id)) AS FLOAT) * 100, 3) 
		END AS min_distrib_pct
	,CASE
		WHEN rows IS NULL OR rows = 0 THEN 0
		ELSE ROUND(CAST((MAX(rows) OVER (PARTITION BY id)) AS FLOAT) / CAST((SUM(rows) OVER (PARTITION BY id)) AS FLOAT) * 100, 3) 
		END AS max_distrib_pct
FROM 
	stv_tbl_perm AS perm
INNER JOIN
	pg_class AS pgc 
		ON pgc.oid = perm.id
INNER JOIN
	pg_namespace AS pgn 
		ON pgn.oid = pgc.relnamespace
WHERE slice < 3201
;

v_constraint_dependency.sql

テーブル間の外部キー制約に関する情報を取得します。

v_generate_group_ddl.sql

グループ作成用のDDL(CREATE GROUP〜)を作成します。

v_generate_schema_ddl.sql

スキーマ作成用のDDL(CREATE SCHEMA〜)を作成します。

v_generate_tbl_ddl.sql

テーブル作成用のDDL(CREATE TABLE〜)を作成します。以前私の方でもチャレンジした(そして志半ばで倒れたw)以下のエントリを完遂した形になっているようです。これは感動モノですね。・゚・(ノ∀`)・゚・。

CREATE OR REPLACE VIEW admin.v_generate_tbl_ddl
AS
SELECT 
 schemaname
 ,tablename
 ,seq
 ,ddl
FROM
 (
 SELECT
  schemaname
  ,tablename
  ,seq
  ,ddl
 FROM
  (
  --DROP TABLE
  SELECT 
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,1 AS seq
   ,'--DROP TABLE ' + n.nspname + '.' + c.relname + ';' AS ddl 
  FROM pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  WHERE c.relkind = 'r'
  --CREATE TABLE
  UNION SELECT 
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,2 AS seq
   ,'CREATE TABLE ' + n.nspname + '.' + c.relname AS ddl
  FROM pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  WHERE c.relkind = 'r'
  --OPEN PAREN COLUMN LIST
  UNION SELECT n.nspname AS schemaname, c.relname AS tablename, 5 AS seq, '(' AS ddl
  FROM pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  WHERE c.relkind = 'r'    
  --COLUMN LIST
  UNION SELECT 
   schemaname
   ,tablename
   ,seq
   ,'\t' + col_delim + '"' + col_name + '"' + ' ' + col_datatype + ' ' + col_nullable + ' ' + col_default + ' ' + col_encoding AS ddl
  FROM
   (
   SELECT
    n.nspname AS schemaname
    ,c.relname AS tablename
    ,100000000 + a.attnum AS seq
    ,CASE WHEN a.attnum > 1 THEN ',' ELSE '' END AS col_delim
    ,a.attname AS col_name
    ,CASE WHEN STRPOS(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER VARYING') > 0
      THEN REPLACE(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER VARYING', 'VARCHAR')
     WHEN STRPOS(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER') > 0
      THEN REPLACE(UPPER(format_type(a.atttypid, a.atttypmod)), 'CHARACTER', 'CHAR')
     ELSE UPPER(format_type(a.atttypid, a.atttypmod))
     END AS col_datatype
    ,CASE WHEN format_encoding((a.attencodingtype)::integer) = 'none'
     THEN ''
     ELSE 'ENCODE ' + format_encoding((a.attencodingtype)::integer)
     END AS col_encoding
    ,CASE WHEN a.atthasdef IS TRUE THEN 'DEFAULT ' + adef.adsrc ELSE '' END AS col_default
    ,CASE WHEN a.attnotnull IS TRUE THEN 'NOT NULL' ELSE '' END AS col_nullable
   FROM pg_namespace AS n
   INNER JOIN pg_class AS c ON n.oid = c.relnamespace
   INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
   LEFT OUTER JOIN pg_attrdef AS adef ON a.attrelid = adef.adrelid AND a.attnum = adef.adnum
   WHERE c.relkind = 'r'
     AND a.attnum > 0 
   ORDER BY a.attnum
   )
  --CONSTRAINT LIST
  UNION (SELECT 
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,200000000 + CAST(con.oid AS INT) AS seq
   ,'\t,' + pg_get_constraintdef(con.oid) AS ddl
  FROM pg_constraint AS con
  INNER JOIN pg_class AS c ON c.relnamespace = con.connamespace AND c.relfilenode = con.conrelid
  INNER JOIN pg_namespace AS n ON n.oid = c.relnamespace 
  WHERE c.relkind = 'r'
  ORDER BY seq)
  --CLOSE PAREN COLUMN LIST
  UNION SELECT n.nspname AS schemaname, c.relname AS tablename, 299999999 AS seq, ')' AS ddl
  FROM pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  WHERE c.relkind = 'r'
  --DISTSTYLE
  UNION SELECT
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,300000000 AS seq
   ,CASE WHEN c.reldiststyle = 0 THEN 'DISTSTYLE EVEN'
    WHEN c.reldiststyle = 1 THEN 'DISTSTYLE KEY'
    WHEN c.reldiststyle = 8 THEN 'DISTSTYLE ALL'
    ELSE '<<Error - UNKNOWN DISTSTYLE>>'
    END AS ddl
  FROM pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  WHERE c.relkind = 'r'
  --DISTKEY COLUMNS
  UNION SELECT
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,400000000 + a.attnum AS seq
   ,'DISTKEY (' + a.attname + ')' AS ddl
  FROM pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
  WHERE c.relkind = 'r'
    AND a.attisdistkey IS TRUE
    AND a.attnum > 0 
  --SORTKEY COLUMNS
  UNION select schemaname, tablename, seq, 
       case when min_sort <0 then 'INTERLEAVED SORTKEY (' else 'SORTKEY (' end as ddl
from (SELECT 
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,499999999 AS seq
   ,min(attsortkeyord) min_sort FROM pg_namespace AS n
  INNER JOIN  pg_class AS c ON n.oid = c.relnamespace
  INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
  WHERE c.relkind = 'r'
  AND abs(a.attsortkeyord) > 0
  AND a.attnum > 0 
  group by 1,2,3 ) 
  UNION (SELECT
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,500000000 + abs(a.attsortkeyord) AS seq
   ,CASE WHEN abs(a.attsortkeyord) = 1 
    THEN '\t' + a.attname 
    ELSE '\t,' + a.attname 
    END AS ddl
  FROM  pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  INNER JOIN pg_attribute AS a ON c.oid = a.attrelid
  WHERE c.relkind = 'r'
    AND abs(a.attsortkeyord) > 0
    AND a.attnum > 0 
  ORDER BY abs(a.attsortkeyord))
  UNION SELECT
   n.nspname AS schemaname
   ,c.relname AS tablename
   ,599999999 AS seq
   ,'\t)' AS ddl
  FROM pg_namespace AS n 
  INNER JOIN  pg_class AS c ON n.oid = c.relnamespace
  INNER JOIN  pg_attribute AS a ON c.oid = a.attrelid
  WHERE c.relkind = 'r'
    AND a.attsortkeyord > 0
    AND a.attnum > 0 
  --END SEMICOLON
  UNION SELECT n.nspname AS schemaname, c.relname AS tablename, 600000000 AS seq, ';' AS ddl
  FROM  pg_namespace AS n
  INNER JOIN pg_class AS c ON n.oid = c.relnamespace
  WHERE c.relkind = 'r'
  )
 ORDER BY schemaname, tablename, seq
 )
;

v_generate_unload_copy_cmd.sql

テーブルのUNLOADコマンドを生成します。

v_generate_user_object_permissions.sql

現状のオブジェクトに対する権限付与文(GRANT〜)を生成します。(※admin.v_get_obj_priv_by_userを事前に作成しておく必要があるようです。)

v_generate_view_ddl.sql

ビュー作成SQL文(CREATE VIEW〜)を生成します。

CREATE OR REPLACE VIEW admin.v_generate_view_ddl
AS
SELECT 
    n.nspname AS schemaname
    ,c.relname AS viewname
    ,'--DROP VIEW ' + n.nspname + '.' + c.relname + ';\nCREATE OR REPLACE VIEW ' + n.nspname + '.' + c.relname + ' AS\n' + COALESCE(pg_get_viewdef(c.oid, TRUE), '') AS ddl
FROM 
    pg_catalog.pg_class AS c
INNER JOIN
    pg_catalog.pg_namespace AS n ON c.relnamespace = n.oid
WHERE relkind = 'v'
;

v_get_obj_priv_by_user.sql

オブジェクトに対するユーザーの権限情報一覧を表示します。

v_get_schema_priv_by_user.sql

スキーマに対するユーザーの権限状況一覧を表示します。

v_get_tbl_priv_by_user.sql

テーブルに対するユーザーの権限情報一覧を表示します。

v_get_users_in_group.sql

ユーザーとユーザーが属するグループの一覧情報を表示します。

v_get_view_priv_by_user.sql

ビューと、そのビューに於けるユーザーのアクセス権限を表示します。

v_object_dependency.sql

オブジェクトの依存関係に関する情報を表示します。(※v_view_dependencyを先に作成しておく必要があるようです)

v_open_session.sql

クラスタのセッションに関する情報を表示します。

v_session_leakage_by_cnt.sql

ホスト名・ユーザー名単位での接続(connects)・切断(disconnects)に関する情報を表示します。

v_space_used_per_tbl.sql

テーブル毎の件数・容量・ソート状況を表示します。

CREATE OR REPLACE VIEW admin.v_space_used_per_tbl
AS
  SELECT
     TRIM(pgdb.datname) AS dbase_name
    ,TRIM(pgn.nspname) as schemaname
    ,TRIM(a.name) AS tablename
    ,id AS tbl_oid
    ,b.mbytes AS megabytes
    ,a.rows AS rowcount
    ,a.unsorted_rows AS unsorted_rowcount
    ,CASE WHEN a.rows = 0 then 0
     ELSE ROUND((a.unsorted_rows::FLOAT / a.rows::FLOAT) * 100, 5)
     END AS pct_unsorted
    ,CASE WHEN a.rows = 0 THEN 'n/a'
     WHEN (a.unsorted_rows::FLOAT / a.rows::FLOAT) * 100 >= 20 THEN 'VACUUM recommended'
     ELSE 'n/a'
     END AS recommendation
  FROM
    (
      SELECT
        db_id
        ,id
        ,name
        ,SUM(rows) AS rows
        ,SUM(rows)-SUM(sorted_rows) AS unsorted_rows
      FROM stv_tbl_perm
      GROUP BY db_id, id, name
    ) AS a
    INNER JOIN
    pg_class AS pgc
      ON pgc.oid = a.id
    INNER JOIN
    pg_namespace AS pgn
      ON pgn.oid = pgc.relnamespace
    INNER JOIN
    pg_database AS pgdb
      ON pgdb.oid = a.db_id
    LEFT OUTER JOIN
    (
      SELECT
        tbl
        ,COUNT(*) AS mbytes
      FROM stv_blocklist
      GROUP BY tbl
    ) AS b
      ON a.id=b.tbl
  ORDER BY 1,3,2;

v_view_dependency.sql

ビューの依存関係に関する情報を表示します。

まとめ

以上、管理系ビューに関する『amazon-redshift-utils』のご紹介でした。この辺りのビューを使えば更にRedshift業務が捗りますね!同時投稿の以下エントリでは『管理スクリプト(AdminScripts)』についても書いていますので併せてご覧頂けますと幸いです。こちらからは以上です。