Amazon Redshift: COPY時のエラー情報を見易い形で取得するSQL文

2016.09.13

この記事は公開されてから1年以上経過しています。情報が古い可能性がありますので、ご注意ください。

超々小ネタです。

Amazon RedshiftでCOPY操作を行う際、新しく取り込むようなファイルだとエラーとなるようなデータの形式であったり、テーブルデータ型との齟齬が頻繁に発生する事も往々にしてありますので都度エラーが発生した際に対象となるシステム系テーブルを参照する必要が出て来ます。その際、これまではあまり意識しては居なかったんですが『そう言えば都度、エラー情報を得る時に手動でSQLを書いてたな』とふと思い、また可変長文字列が多いテーブルでもありますのでそのままの情報を得ようとすると若干見辛いというのもあるのでその手間を省くべく確認用のSQLをネタとして用意しとこうと思いました。

stl_loar_errorsテーブル参照用SQL

以下はエラー発生時に参照すべきテーブル、『stl_loar_errors』テーブルを分割して表示させるSQL群です。対象となるテーブルのテーブル名が無かったのでstv_tbl_permテーブルから引っ張ってきてみました。

/** QUERY(1). */
SELECT
  TRIM(stl_load_errors.userid) AS userid,
  TRIM(stl_load_errors.slice) AS slice,
  TRIM(stl_load_errors.tbl) AS tbl,
  TRIM(table_name_list.tablename) AS target_table,
  TRIM(stl_load_errors.starttime) AS starttime,
  TRIM(stl_load_errors.session) AS session,
  TRIM(stl_load_errors.query) AS query
FROM
  stl_load_errors
    INNER JOIN (SELECT DISTINCT id AS table_id, name AS tablename FROM stv_tbl_perm) table_name_list
      ON stl_load_errors.tbl = table_name_list.table_id
ORDER BY
  starttime DESC
LIMIT 1;
 
/** QUERY(2). */
SELECT
  TRIM(starttime) AS starttime,
  TRIM(filename) AS filename,
  TRIM(line_number) AS line_number
FROM
  stl_load_errors
ORDER BY
  starttime DESC
LIMIT 1;
 
/** QUERY(3). */
SELECT
  TRIM(starttime) AS starttime,
  TRIM(raw_line) AS raw_line
FROM
  stl_load_errors
ORDER BY
  starttime DESC
LIMIT 1;
 
/** QUERY(4). */
SELECT
  TRIM(starttime) AS starttime,
  TRIM(colname) AS colname,
  TRIM(type) AS type,
  TRIM(col_length) AS col_length,
  TRIM(position) AS position,
  TRIM(raw_field_value) AS raw_field_value,
  TRIM(err_code) AS err_code,
  TRIM(err_reason) AS err_reason
FROM
 stl_load_errors
ORDER BY
 starttime DESC
LIMIT 1;

実行結果は以下の通りです。

# /** QUERY(1). */
# SELECT
#   TRIM(stl_load_errors.userid) AS userid,
#   TRIM(stl_load_errors.slice) AS slice,
#   TRIM(stl_load_errors.tbl) AS tbl,
#   TRIM(table_name_list.tablename) AS target_table,
#   TRIM(stl_load_errors.starttime) AS starttime,
#   TRIM(stl_load_errors.session) AS session,
#   TRIM(stl_load_errors.query) AS query
# FROM
#   stl_load_errors
#     INNER JOIN (SELECT DISTINCT id AS table_id, name AS tablename FROM stv_tbl_perm) table_name_list
#       ON stl_load_errors.tbl = table_name_list.table_id
# ORDER BY
#   starttime DESC
# LIMIT 1;
 userid | slice |  tbl   | target_table |      starttime      | session | query  
--------+-------+--------+--------------+---------------------+---------+--------
 100    | 1     | 542391 | customer     | 2016-09-12 12:34:56 | 12345   | 123456
(1 row)

# /** QUERY(2). */
# SELECT
#   TRIM(starttime) AS starttime,
#   TRIM(filename) AS filename,
#   TRIM(line_number) AS line_number
# FROM
#   stl_load_errors
# ORDER BY
#   starttime DESC
# LIMIT 1;
      starttime      |                                filename                                | line_number 
---------------------+------------------------------------------------------------------------+-------------
 2016-09-12 12:34:56 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz | 2
(1 row)

# /** QUERY(3). */
# SELECT
#   TRIM(starttime) AS starttime,
#   TRIM(raw_line) AS raw_line
# FROM
#   stl_load_errors
# ORDER BY
#   starttime DESC
# LIMIT 1;
      starttime      |                                             raw_line                                             
---------------------+--------------------------------------------------------------------------------------------------
 2016-09-12 12:34:56 | 2,Customer#000000002,XSTf4,NCwDVaWNe6tE,JORDAN   1,JORDAN,MIDDLE EAST,23-768-687-3665,AUTOMOBILE
(1 row)

# /** QUERY(4). */
# SELECT
#   TRIM(starttime) AS starttime,
#   TRIM(colname) AS colname,
#   TRIM(type) AS type,
#   TRIM(col_length) AS col_length,
#   TRIM(position) AS position,
#   TRIM(raw_field_value) AS raw_field_value,
#   TRIM(err_code) AS err_code,
#   TRIM(err_reason) AS err_reason
# FROM
#  stl_load_errors
# ORDER BY
#  starttime DESC
# LIMIT 1;
      starttime      | colname | type | col_length | position | raw_field_value | err_code |      err_reason       
---------------------+---------+------+------------+----------+-----------------+----------+-----------------------
 2016-09-12 12:34:56 |         |      |            | 85       |                 | 1202     | Extra column(s) found
(1 row)

#

stl_loarerror_detailテーブル参照用SQL

STL_LOADERROR_DETAILテーブルはエラーに関する情報のより詳細な部分を確認する事が出来ます。こちらも可変長文字列の部分が幾つかありますので、TRIM関数を使って不要なスペースをカットする処理を施しています。

SELECT
  stl_loaderror_detail.userid,
  stl_loaderror_detail.slice,
  stl_loaderror_detail.session,
  TRIM(stl_loaderror_detail.query) AS query,
  TRIM(stl_loaderror_detail.filename) AS filename,
  stl_loaderror_detail.line_number,
  TRIM(stl_loaderror_detail.field) AS field,
  TRIM(stl_loaderror_detail.colname) AS colname,
  TRIM(stl_loaderror_detail.value) AS value,
  stl_loaderror_detail.is_null,
  stl_loaderror_detail.type,
  stl_loaderror_detail.col_length
FROM
  stl_loaderror_detail
WHERE
  session = <セッションID>
  AND query = <クエリID>;

実行結果は以下となります。

# SELECT
#   stl_loaderror_detail.userid,
#   stl_loaderror_detail.slice,
#   stl_loaderror_detail.session,
#   TRIM(stl_loaderror_detail.query) AS query,
#   TRIM(stl_loaderror_detail.filename) AS filename,
#   stl_loaderror_detail.line_number,
#   TRIM(stl_loaderror_detail.field) AS field,
#   TRIM(stl_loaderror_detail.colname) AS colname,
#   TRIM(stl_loaderror_detail.value) AS value,
#   stl_loaderror_detail.is_null,
#   stl_loaderror_detail.type,
#   stl_loaderror_detail.col_length
# FROM
#   stl_loaderror_detail
# WHERE
#   session = 12345
#   AND query = 123456;
 userid | slice | session | query  |                                filename                                | line_number | field |   colname    |       value        | is_null |    type    | col_length 
--------+-------+---------+--------+------------------------------------------------------------------------+-------------+-------+--------------+--------------------+---------+------------+------------
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 0     | c_custkey    | 2                  |       0 | int4       | 0         
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 1     | c_name       | Customer#000000002 |       0 | varchar    | 25        
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 2     | c_address    | XSTf4              |       0 | varchar    | 25        
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 3     | c_city       | NCwDVaWNe6tE       |       0 | varchar    | 25        
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 4     | c_nation     | JORDAN   1         |       0 | varchar    | 25        
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 5     | c_region     | JORDAN             |       0 | varchar    | 25        
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 6     | c_phone      | MIDDLE EAST        |       0 | varchar    | 20        
    100 |     1 |   12345 | 123456 | s3://xxxxxxx-xxxx-xxxxxxxx/customer/single-files/customer-files.csv.gz |           2 | 7     | c_mktsegment | 23-768-687-3665    |       0 | varchar    | 25        
(8 rows)

まとめ

Amazon RedshiftのCOPY処理に於けるロードエラー情報を解析する為のテーブル参照SQLに関するご紹介でした。作業の際にはこちらのSQLをコピペ可能な状態にしておいて問題特定を迅速に行えるようにしておきたいですね。こちらからは以上です。