Table of Contents

Performance monitoring (tools and techniques)

Connections

Currently active connections :

db2 "SELECT agent_state, event_state, event_object || ':' || event_type event,  COUNT(*) count FROM TABLE(WLM_GET_SERVICE_CLASS_AGENTS('', '',CAST(NULL AS BIGINT),-2)) AS agent,      TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS conn WHERE agent.application_handle = conn.application_handle GROUP BY agent_state, event_state, event_object, event_type ORDER BY 1,2,3"

Connections overview :

db2 "with conn as (select varchar(client_hostname,20) hostname,count(*) connections from TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS t group by client_hostname with ur) 
select * from conn union all select 'TOTAL_CONNECTIONS',sum(connections) from conn order by 2 with ur"

Memory

Memory pools (except PRIVATE) :

db2 "SELECT varchar(memory_set_type, 20) AS set_type,
       varchar(memory_pool_type,20) AS pool_type,
       varchar(db_name, 20) AS dbname,
       memory_pool_used,
       memory_pool_used_hwm
FROM TABLE( 
       MON_GET_MEMORY_POOL(NULL, CURRENT_SERVER, -2)) t where t.memory_set_type <> 'PRIVATE' with ur"

Locks

Show lock chain :

db2 "select substr(ai_h.primary_auth_id,1,10) as Holder_APP_ID, \
ai_h.agent_id as HOLDER_APP_HANDLE, substr(lw.authid,1,10) as Waiter_APP_ID, \
lw.agent_id as WAITER_APP_HANDLE, lw.lock_mode , lw.lock_object_type , \
substr(lw.tabname,1,10) as TabName,substr(lw.tabschema,1,10) as Schema, \
timestampdiff(2,char(lw.snapshot_timestamp-lw.lock_wait_start_time)) as waiting_s \
 from sysibmadm.lockwaits lw , sysibmadm.snapappl_info \
ai_h where lw.agent_id_holding_lk = ai_h.agent_id with ur" 

Autoconfigure

Show me the recommendations :

db2 AUTOCONFIGURE USING
MEM_PERCENT 60
WORKLOAD_TYPE MIXED
NUM_STMTS 500
ADMIN_PRIORITY BOTH
IS_POPULATED YES
NUM_LOCAL_APPS 0
NUM_REMOTE_APPS 20
ISOLATION RR
BP_RESIZEABLE YES
APPLY NONE

Snapshots

Lock waits :

db2 "select SNAPSHOT_TIMESTAMP, AGENT_ID, AGENT_ID_HOLDING_LK from icrica1p.snapshot_lockwait order by SNAPSHOT_TIMESTAMP"

Statements :

db2 "select SNAPSHOT_TIMESTAMP,AGENT_ID, PACKAGE_NAME from icrica1p.snapshot_statement where AGENT_ID = 106"

Snaphots provide the Key Performance Indicators. Instance level monitor switches must be turned on :

DFT_MON_BUFPOOL - collection of buffer pool monitoring data

Snapshots can be taken for various stuff :

db2 get snapshot for database on <DBNAME>

Monitoring

Memory monitoring :

db2 "SELECT varchar(memory_set_type, 20) AS set_type,varchar(memory_pool_type,20) AS pool_type, varchar(db_name, 20) AS dbname,memory_pool_used, memory_pool_used_hwm FROM TABLE(MON_GET_MEMORY_POOL(NULL,CURRENT_SERVER, -2))"

Identify "hot" table spaces

1. Total Physical I/Os

(Buffer pool data physical reads + Buffer pool index physical reads + Buffer pool xda physical reads + Buffer pool temporary data physicalreads) + (Direct reads * 512 ) / table space page size

db2 "select substr(TBSP_NAME,1,15), ((POOL_DATA_P_READS+POOL_INDEX_P_READS+POOL_XDA_P_READS+POOL_TEMP_DATA_P_READS)+(DIRECT_READS*512)/TBSP_PAGE_SIZE) PHYSICAL_IO from SYSIBMADM.SNAPTBSP where tbsp_name not like 'SYS%' and tbsp_name not like '%TEMP%'"

DB2 Design advisor / Explain / Access plans

DB2ANALYZE

#!/usr/bin/env ksh
. ~/.profile

DB=DBWCPROD
SCHEMA=DATAUSER

INPUT=$1
OUTPUT_EXPLAIN=`echo ${INPUT} | sed 's/sql/explain/g'`
OUTPUT_ADVIS=`echo ${INPUT} | sed 's/sql/advis/g'`

echo "Processing INPUT SQL ${INPUT} :"
db2 connect to ${DB} >/dev/null
db2 set current schema ${SCHEMA}
db2 set current explain mode explain
db2 -tvf ${INPUT} 

# explain
db2exfmt -d ${DB} -g -o ${OUTPUT_EXPLAIN} -w -1 -n % -s % -# 0

# advis
db2advis -d ${DB} -m IC -n ${SCHEMA} -q ${SCHEMA} -i ${INPUT} > ${OUTPUT_ADVIS}

db2 set current explain mode no
db2 terminate

Create the DB2 Design Advisor / Explain tables in current database :

db2 -tvf $INSTHOME/sqllib/misc/EXPLAIN.DDL

or

db2 "CALL SYSPROC.SYSINSTALLOBJECTS('EXPLAIN', 'C', CAST (NULL AS VARCHAR(128)), CAST (NULL AS VARCHAR(128)))"

Explain and Access plan :

db2 SET CURRENT EXPLAIN MODE EXPLAIN
db2 <sqlstmt>
db2 SET CURRENT EXPLAIN MODE NO

# or 

db2 "EXPLAIN ALL FOR <sqlstmt>"

Format the captured Explain information :

db2exfmt -d <DB> [ -u <userid> <pwd> ] -g -o expl_<STMT>.out -w -1 -n % -s % -# 0

Explain Static SQL (package) (also shows included SQL statements) :

db2expln -database <DB> -schema <SCHEMA> -package <PACKAGE> -output expl_<PACKAGE>.txt

DB2 Advisor :

db2advis -d ${DB} -n ${SCHEMA} -q ${SCHEMA} -i ${INPUT} > ${OUTPUT_ADVIS}

Index regression analysis based on proposed indexes :

db2 "update advise_index set use_index = 'N' where exists= 'N'"

db2 "update advise_index set use_index = 'Y' where name = '<INDEX>'"

db2 set current explain mode evaluate indexes
db2 -tvf <QUERY>

db2exfmt -d <DB> -1 -o <QUERY_OUTPUT>.txt

Query costs

Index read efficiency :

db2 "WITH SUM_TAB (SUM_RR) AS (
        SELECT FLOAT(SUM(ROWS_READ))
        FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T)
SELECT
        EXECUTABLE_ID,
        ROWS_READ,
        DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) AS PCT_TOT_RR,
        ROWS_RETURNED,
        CASE
            WHEN ROWS_RETURNED > 0 THEN
                DECIMAL(FLOAT(ROWS_READ)/FLOAT(ROWS_RETURNED),10,2)
            ELSE -1
        END AS READ_EFFICIENCY,
        NUM_EXECUTIONS
    FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T, SUM_TAB
    ORDER BY ROWS_READ DESC FETCH FIRST 5 ROWS ONLY WITH UR"

Show me problematic SQLs ( > 9.7) from package cache :

db2 "WITH SUM_TAB (SUM_RR, SUM_CPU, SUM_EXEC, SUM_SORT, SUM_NUM_EXEC) AS (
        SELECT  FLOAT(SUM(ROWS_READ)),
                FLOAT(SUM(TOTAL_CPU_TIME)),
                FLOAT(SUM(STMT_EXEC_TIME)),
                FLOAT(SUM(TOTAL_SECTION_SORT_TIME)),
                FLOAT(SUM(NUM_EXECUTIONS))
            FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T
        )
SELECT
        EXECUTABLE_ID,
        ROWS_READ,
        DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) AS PCT_TOT_RR,
        TOTAL_CPU_TIME,
        DECIMAL(100*(FLOAT(TOTAL_CPU_TIME)/SUM_TAB.SUM_CPU),5,2) AS PCT_TOT_CPU,
        STMT_EXEC_TIME,
        DECIMAL(100*(FLOAT(STMT_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) AS PCT_TOT_EXEC,
        TOTAL_SECTION_SORT_TIME,
        DECIMAL(100*(FLOAT(TOTAL_SECTION_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) AS PCT_TOT_SRT,
        NUM_EXECUTIONS,
        DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) AS PCT_TOT_EXEC,
        DECIMAL(FLOAT(STMT_EXEC_TIME)/FLOAT(NUM_EXECUTIONS),10,2) AS AVG_EXEC_TIME
    FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T, SUM_TAB
    WHERE DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) > 10
        OR DECIMAL(100*(FLOAT(TOTAL_CPU_TIME)/SUM_TAB.SUM_CPU),5,2) >10
        OR DECIMAL(100*(FLOAT(STMT_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) >10
        OR DECIMAL(100*(FLOAT(TOTAL_SECTION_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) >10
        OR DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) >10
    ORDER BY ROWS_READ DESC FETCH FIRST 20 ROWS ONLY WITH UR"

Show me problematic SQLs from package cache ( < 9.7 ) :

WITH SUM_TAB (SUM_RR, SUM_CPU, SUM_EXEC, SUM_SORT, SUM_NUM_EXEC) AS (
        SELECT  FLOAT(SUM(ROWS_READ)),
                FLOAT(SUM(TOTAL_USR_CPU_TIME)) + FLOAT(SUM(TOTAL_SYS_CPU_TIME)),
                FLOAT(SUM(TOTAL_EXEC_TIME)),
                FLOAT(SUM(TOTAL_SORT_TIME)),
                FLOAT(SUM(NUM_EXECUTIONS))
            FROM SYSIBMADM.SNAPDYN_SQL AS T
        )
SELECT
        SUBSTR(STMT_TEXT,1,10) as STATEMENT,
        ROWS_READ,
        DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) AS PCT_TOT_RR,
        TOTAL_USR_CPU_TIME + TOTAL_SYS_CPU_TIME as TOTAL_CPU_TIME,
        DECIMAL(100*((FLOAT(TOTAL_USR_CPU_TIME)+FLOAT(TOTAL_SYS_CPU_TIME))/SUM_TAB.SUM_CPU),5,2) AS PCT_TOT_CPU,
        TOTAL_EXEC_TIME,
        DECIMAL(100*(FLOAT(TOTAL_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) AS PCT_TOT_EXEC,
        TOTAL_SORT_TIME,
        DECIMAL(100*(FLOAT(TOTAL_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) AS PCT_TOT_SRT,
        NUM_EXECUTIONS,
        DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) AS PCT_TOT_EXEC,
        DECIMAL(FLOAT(TOTAL_EXEC_TIME)/FLOAT(NUM_EXECUTIONS),10,2) AS AVG_EXEC_TIME
    FROM SYSIBMADM.SNAPDYN_SQL AS T, SUM_TAB
    WHERE DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) > 10
        OR DECIMAL(100*((FLOAT(TOTAL_USR_CPU_TIME)+FLOAT(TOTAL_SYS_CPU_TIME))/SUM_TAB.SUM_CPU),5,2)  >10
        OR DECIMAL(100*(FLOAT(TOTAL_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) >10
        OR DECIMAL(100*(FLOAT(TOTAL_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) >10
        OR DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) >10
    ORDER BY ROWS_READ DESC FETCH FIRST 20 ROWS ONLY WITH UR;

Get execution IDs by total statement exec time :

db2 "select executable_id,TOTAL_STMT_EXEC_TIME from sysibmadm.MON_PKG_CACHE_SUMMARY order by TOTAL_STMT_EXEC_TIME desc fetch first 10 rows only with ur"

Get statement from executable_id :

db2 -x "SELECT varchar(STMT_TEXT,3000) STMT FROM TABLE(MON_GET_PKG_CACHE_STMT(null, x'01000000000000007D0000000000000000000000020020081126172409987719', null, -2)) with ur"

DB2BATCH Query response time

db2batch -d <DB> -f <INPUT.SQL> -q on -o r 0

Optimization Classes & Profiles

Optimization Classes :

Automatic Maintenance

Automatic backups

Update automatic backup job :

CALL SYSPROC.AUTOMAINT_SET_POLICY ( 'AUTO_BACKUP', BLOB('<?xml version="1.0" encoding="UTF-8"?><DB2AutoBackupPolicy xmlns="http://www.ibm.com/xmlns/prod/db2/autonomic/config"><BackupOptions mode="Offline"><BackupTarget><DiskBackupTarget><PathName>/db2/db2inst1/home</PathName></DiskBackupTarget></BackupTarget></BackupOptions><BackupCriteria numberOfFullBackups="1" timeSinceLastBackup="24" logSpaceConsumedSinceLastBackup="6400"/></DB2AutoBackupPolicy> ') );

Event monitors

Check event monitors :

db2 "select varchar(evmonname,30) as evmonname, target_type,EVENT_MON_STATE(evmonname) as state,autostart  from syscat.eventmonitors with ur"

Create new event monitor :

db2 "CREATE EVENT MONITOR <MON> FOR <EVENT> WRITE TO TABLE"

Connection Event monitor with filter :

db2 "create event monitor ld_conn for connections where appl_name <> 'KUDDB2' write to table"

Enable/Disable Event monitor :

db2 set event monitor DB2DETAILDEADLOCK state 0

Create & Activate the LD_CONN event monitor :

for db in `db2 list db directory | grep -p Indirect | grep alias | awk '{print $NF}'`; do db2 connect to ${db} >/dev/null;db2 "create event monitor ld_conn for connections where appl_name <> 'KUDDB2' write to table";db2 set event monitor LD_CONN state 1;db2 "select varchar(current schema,9) instance, varchar(current server, 9) database, substr(evmonname,1,30) as evmonname, EVENT_MON_STATE(evmonname) as state from syscat.eventmonitors with ur";db2 terminate >/dev/null; done

Check status of Event monitors in multiple databases :

for db in `db2 list db directory | grep -p Indirect | grep alias | awk '{print $NF}'`; do db2 connect to ${db};db2 "select substr(evmonname,1,30) as evmonname, EVENT_MON_STATE(evmonname) as state from syscat.eventmonitors with ur"; done

Export from Connection event monitor :

db2 "export to aixreprod01_connections.csv of del select substr(AUTH_ID,1,20) USER, substr(appl_name,1,20) APPLICATION, char(CLIENT_NNAME,30) HOSTNAME,conn_time TIME from CONNHEADER_LD_CONN order by conn_time with ur"

Activity monitor (with filtering) :

 CREATE EVENT MONITOR DBAACTIVITIES
       FOR ACTIVITIES
       WRITE TO TABLE
       ACTIVITY (TABLE ACTIVITY_DBAACTIVITIES
                 IN TS32K_DBA
                 PCTDEACTIVATE 100),
       ACTIVITYMETRICS (TABLE ACTIVITYMETRICS_DBAACTIVITIES
                        IN TS32K_DBA
                        PCTDEACTIVATE 100),
       ACTIVITYSTMT (TABLE ACTIVITYSTMT_DBAACTIVITIES
                     IN TS32K_DBA
                     PCTDEACTIVATE 100),
       ACTIVITYVALS (TABLE ACTIVITYVALS_DBAACTIVITIES
                     IN TS32K_DBA
                     PCTDEACTIVATE 100),
       CONTROL (TABLE CONTROL_DBAACTIVITIES
                IN TS32K_DBA
                PCTDEACTIVATE 100)
       MANUALSTART@
       
       create table dba_metrics.config (attr varchar(100),value varchar(100),enabled char(1)) in ts32k_dba@
       
       create unique index dba_metrics.I_CONFIG_U01 on dba_metrics.config (attr,value) include (enabled)@
       
       CREATE OR REPLACE TRIGGER DBA_METRICS.T_CONNECTION_FILTER AFTER INSERT ON DBA_METRICS.MON_CONNECTIONS_DETAILS
REFERENCING NEW AS N
FOR EACH ROW MODE DB2SQL
WHEN (N.HOSTNAME IN (SELECT VALUE FROM DBA_METRICS.CONFIG WHERE ATTR = 'filter_activities' AND ENABLED='1'))
BEGIN ATOMIC
	CALL SYSPROC.WLM_SET_CONN_ENV(N.APPLICATION_HANDLE,'<collectactdata>WITH DETAILS, SECTION</collectactdata>'); 
END@

CREATE OR REPLACE TRIGGER dba_metrics.T_CONFIG_DISABLE_FILTER AFTER UPDATE ON DBA_METRICS.CONFIG
REFERENCING OLD AS O NEW AS N 
FOR EACH ROW MODE DB2SQL
WHEN (N.ENABLED = '0')
BEGIN ATOMIC
	DECLARE app_handle BIGINT;

	FOR v AS
		select distinct application_handle FROM TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS t where t.CLIENT_HOSTNAME = o.value and o.attr = 'filter_activities'
	DO 
		set app_handle = v.application_handle;
		CALL SYSPROC.WLM_SET_CONN_ENV(app_handle,'<collectactdata>NONE</collectactdata>');
	END FOR;
END@

Sorting

Sort utilization :

db2 "SELECT APPLICATION_HANDLE,
sum(TOTAL_SORTS) as SUM_TOTAL_SORTS,
sum(TOTAL_SECTION_SORTS) as SUM_TOTAL_SECTION_SORTS,
sum(SORT_OVERFLOWS) as SUM_SORT_OVERFLOWS,
sum(POST_THRESHOLD_SORTS) as SUM_POST_THRESHOLD_SORTS,
decimal(avg(TOTAL_SECTION_PROC_TIME)/1000.0,8,2) as AVG_TOTAL_SECTION_PROC_TIME_SEC,
decimal(avg(TOTAL_SECTION_SORT_PROC_TIME)/1000.0,8,2) as
AVG_TOTAL_SECTION_SORT_PROC_TIME_SEC,
sum(TOTAL_HASH_JOINS) as SUM_TOTAL_HASH_JOINS,
sum(TOTAL_HASH_LOOPS) as SUM_TOTAL_HASH_LOOPS,
sum(HASH_JOIN_OVERFLOWS) as SUM_HASH_JOIN_OVERFLOWS,
sum(ROWS_MODIFIED) as SUM_ROWS_MODIFIED,
sum(POOL_DATA_WRITES) as SUM_POOL_DATA_WRITES
FROM TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS t
GROUP BY APPLICATION_HANDLE with ur"

Read ratios :

db2 "WITH READ_METRICS as (
SELECT APPLICATION_HANDLE,
sum(ROWS_READ) as ROWS_READ,
sum(POOL_DATA_L_READS) as POOL_DATA_L_READS,
sum(POOL_INDEX_L_READS) as POOL_INDEX_L_READS,
sum(POOL_TEMP_DATA_L_READS+POOL_TEMP_INDEX_L_READS) as POOL_TEMP_L_READS
FROM TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS m
group by application_handle
)
select r.APPLICATION_HANDLE, r.ROWS_READ,
case when POOL_DATA_L_READS+POOL_TEMP_L_READS > 0 then
decimal(r.ROWS_READ*1.00/(POOL_DATA_L_READS+POOL_TEMP_L_READS), 8,2) end as
ROWS_READ_PER_POOL_L_READ,
POOL_DATA_L_READS, POOL_INDEX_L_READS, POOL_TEMP_L_READS,
varchar(STMT_TEXT,100) as STMT_TEXT
from READ_METRICS r left outer join SYSIBMADM.MON_CURRENT_SQL s
ON r.APPLICATION_HANDLE = s.APPLICATION_HANDLE
order by pool_data_l_reads desc with ur"

Bufferpools

BP read efficiency :

db2 "SELECT SUBSTR(BP_NAME, 1, 15) AS BP_NAME, TOTAL_PHYSICAL_READS, AVERAGE_READ_TIME_MS,TOTAL_ASYNC_READS,TOTAL_SYNC_READS,PERCENT_SYNC_READS,ASYNC_NOT_READ_PERCENT FROM SYSIBMADM.BP_READ_IO with ur"

Page cleaning (pS Castout) rates :

db2 "select
current timestamp as Time,
case when sum(w.TOTAL_APP_COMMITS) < 100 then null else
cast( float(sum(b.POOL_DATA_WRITES+b.POOL_INDEX_WRITES))
/ sum(w.TOTAL_APP_COMMITS) as decimal(6,1)) end
as \"BP wrt / UOW\",
case when sum(b.POOL_DATA_WRITES+b.POOL_INDEX_WRITES) < 1000 then null else
cast( float(sum(b.POOL_WRITE_TIME))
/ sum(b.POOL_DATA_WRITES+b.POOL_INDEX_WRITES) as decimal(5,1)) end
as \"ms / BP wrt\"
from table(mon_get_workload(null,null)) as w,
table(mon_get_bufferpool(null,null)) as b"

TPC-DS Benchmark

Generate SQLs :

dsqgen -input ../query_templates/templates.lst -directory ../query_templates -dialect db2 -scale 10 -verbose

Generate DATA :

dsdgen -SCALE 10 -dir ~/DATA -verbose

Disk speed :

#!/bin/bash
 
myfile=$1
 
echo "Creating test file $myfile using direct I/O"
dd if=/dev/zero of=$myfile bs=1024M count=12 oflag=direct
 
sync;sync;sync;echo 3 > /proc/sys/vm/drop_caches
 
echo "Single Direct I/O writer"
( dd if=/dev/zero of=$myfile bs=64K count=196608 conv=notrunc oflag=direct > thread1.out 2>&1 ) &
 
wait
cat thread1.out
 
echo "Two Direct I/O writer"
( dd if=/dev/zero of=$myfile bs=64K count=98304 conv=notrunc oflag=direct > thread1.out 2>&1 ) &
( dd if=/dev/zero of=$myfile bs=64K count=98304 seek=98304 conv=notrunc oflag=direct > thread2.out 2>&1 ) &
 
wait
cat thread1.out thread2.out