====== Performance monitoring (tools and techniques) ======
===== Connections ======
**Currently active connections :**
db2 "SELECT agent_state, event_state, event_object || ':' || event_type event, COUNT(*) count FROM TABLE(WLM_GET_SERVICE_CLASS_AGENTS('', '',CAST(NULL AS BIGINT),-2)) AS agent, TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS conn WHERE agent.application_handle = conn.application_handle GROUP BY agent_state, event_state, event_object, event_type ORDER BY 1,2,3"
**Connections overview :**
db2 "with conn as (select varchar(client_hostname,20) hostname,count(*) connections from TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS t group by client_hostname with ur)
select * from conn union all select 'TOTAL_CONNECTIONS',sum(connections) from conn order by 2 with ur"
===== Memory =====
**Memory pools (except PRIVATE) :**
db2 "SELECT varchar(memory_set_type, 20) AS set_type,
varchar(memory_pool_type,20) AS pool_type,
varchar(db_name, 20) AS dbname,
memory_pool_used,
memory_pool_used_hwm
FROM TABLE(
MON_GET_MEMORY_POOL(NULL, CURRENT_SERVER, -2)) t where t.memory_set_type <> 'PRIVATE' with ur"
===== Locks =====
**Show lock chain :**
db2 "select substr(ai_h.primary_auth_id,1,10) as Holder_APP_ID, \
ai_h.agent_id as HOLDER_APP_HANDLE, substr(lw.authid,1,10) as Waiter_APP_ID, \
lw.agent_id as WAITER_APP_HANDLE, lw.lock_mode , lw.lock_object_type , \
substr(lw.tabname,1,10) as TabName,substr(lw.tabschema,1,10) as Schema, \
timestampdiff(2,char(lw.snapshot_timestamp-lw.lock_wait_start_time)) as waiting_s \
from sysibmadm.lockwaits lw , sysibmadm.snapappl_info \
ai_h where lw.agent_id_holding_lk = ai_h.agent_id with ur"
===== Autoconfigure =====
**Show me the recommendations :**
db2 AUTOCONFIGURE USING
MEM_PERCENT 60
WORKLOAD_TYPE MIXED
NUM_STMTS 500
ADMIN_PRIORITY BOTH
IS_POPULATED YES
NUM_LOCAL_APPS 0
NUM_REMOTE_APPS 20
ISOLATION RR
BP_RESIZEABLE YES
APPLY NONE
===== Snapshots =====
**Lock waits :**
db2 "select SNAPSHOT_TIMESTAMP, AGENT_ID, AGENT_ID_HOLDING_LK from icrica1p.snapshot_lockwait order by SNAPSHOT_TIMESTAMP"
**Statements :**
db2 "select SNAPSHOT_TIMESTAMP,AGENT_ID, PACKAGE_NAME from icrica1p.snapshot_statement where AGENT_ID = 106"
Snaphots provide the Key Performance Indicators. Instance level monitor switches must be turned on :
DFT_MON_BUFPOOL - collection of buffer pool monitoring data
Snapshots can be taken for various stuff :
db2 get snapshot for database on
===== Monitoring =====
**Memory monitoring :**
db2 "SELECT varchar(memory_set_type, 20) AS set_type,varchar(memory_pool_type,20) AS pool_type, varchar(db_name, 20) AS dbname,memory_pool_used, memory_pool_used_hwm FROM TABLE(MON_GET_MEMORY_POOL(NULL,CURRENT_SERVER, -2))"
=== Identify "hot" table spaces ===
**1. Total Physical I/Os**
(Buffer pool data physical reads + Buffer pool index physical reads + Buffer pool xda physical reads + Buffer pool temporary data physicalreads) + (Direct reads * 512 ) / table space page size
db2 "select substr(TBSP_NAME,1,15), ((POOL_DATA_P_READS+POOL_INDEX_P_READS+POOL_XDA_P_READS+POOL_TEMP_DATA_P_READS)+(DIRECT_READS*512)/TBSP_PAGE_SIZE) PHYSICAL_IO from SYSIBMADM.SNAPTBSP where tbsp_name not like 'SYS%' and tbsp_name not like '%TEMP%'"
===== DB2 Design advisor / Explain / Access plans =====
**DB2ANALYZE **
#!/usr/bin/env ksh
. ~/.profile
DB=DBWCPROD
SCHEMA=DATAUSER
INPUT=$1
OUTPUT_EXPLAIN=`echo ${INPUT} | sed 's/sql/explain/g'`
OUTPUT_ADVIS=`echo ${INPUT} | sed 's/sql/advis/g'`
echo "Processing INPUT SQL ${INPUT} :"
db2 connect to ${DB} >/dev/null
db2 set current schema ${SCHEMA}
db2 set current explain mode explain
db2 -tvf ${INPUT}
# explain
db2exfmt -d ${DB} -g -o ${OUTPUT_EXPLAIN} -w -1 -n % -s % -# 0
# advis
db2advis -d ${DB} -m IC -n ${SCHEMA} -q ${SCHEMA} -i ${INPUT} > ${OUTPUT_ADVIS}
db2 set current explain mode no
db2 terminate
**Create the DB2 Design Advisor / Explain tables in current database :**
db2 -tvf $INSTHOME/sqllib/misc/EXPLAIN.DDL
or
db2 "CALL SYSPROC.SYSINSTALLOBJECTS('EXPLAIN', 'C', CAST (NULL AS VARCHAR(128)), CAST (NULL AS VARCHAR(128)))"
**Explain and Access plan :**
db2 SET CURRENT EXPLAIN MODE EXPLAIN
db2
db2 SET CURRENT EXPLAIN MODE NO
# or
db2 "EXPLAIN ALL FOR "
**Format the captured Explain information :**
db2exfmt -d [ -u ] -g -o expl_.out -w -1 -n % -s % -# 0
**Explain Static SQL (package) (also shows included SQL statements) :**
db2expln -database -schema -package -output expl_.txt
**DB2 Advisor :**
db2advis -d ${DB} -n ${SCHEMA} -q ${SCHEMA} -i ${INPUT} > ${OUTPUT_ADVIS}
**Index regression analysis based on proposed indexes :**
* 1)Disable explain evaluation with non-existent indexes :""
db2 "update advise_index set use_index = 'N' where exists= 'N'"
* 2) Enable only specific index from advisor output :
db2 "update advise_index set use_index = 'Y' where name = ''"
* 3) Set explain to evaluate indexes mode :
db2 set current explain mode evaluate indexes
db2 -tvf
* 4) Export new explain plan
db2exfmt -d -1 -o .txt
===== Query costs =====
**Index read efficiency :**
db2 "WITH SUM_TAB (SUM_RR) AS (
SELECT FLOAT(SUM(ROWS_READ))
FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T)
SELECT
EXECUTABLE_ID,
ROWS_READ,
DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) AS PCT_TOT_RR,
ROWS_RETURNED,
CASE
WHEN ROWS_RETURNED > 0 THEN
DECIMAL(FLOAT(ROWS_READ)/FLOAT(ROWS_RETURNED),10,2)
ELSE -1
END AS READ_EFFICIENCY,
NUM_EXECUTIONS
FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T, SUM_TAB
ORDER BY ROWS_READ DESC FETCH FIRST 5 ROWS ONLY WITH UR"
**Show me problematic SQLs ( > 9.7) from package cache :**
db2 "WITH SUM_TAB (SUM_RR, SUM_CPU, SUM_EXEC, SUM_SORT, SUM_NUM_EXEC) AS (
SELECT FLOAT(SUM(ROWS_READ)),
FLOAT(SUM(TOTAL_CPU_TIME)),
FLOAT(SUM(STMT_EXEC_TIME)),
FLOAT(SUM(TOTAL_SECTION_SORT_TIME)),
FLOAT(SUM(NUM_EXECUTIONS))
FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T
)
SELECT
EXECUTABLE_ID,
ROWS_READ,
DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) AS PCT_TOT_RR,
TOTAL_CPU_TIME,
DECIMAL(100*(FLOAT(TOTAL_CPU_TIME)/SUM_TAB.SUM_CPU),5,2) AS PCT_TOT_CPU,
STMT_EXEC_TIME,
DECIMAL(100*(FLOAT(STMT_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) AS PCT_TOT_EXEC,
TOTAL_SECTION_SORT_TIME,
DECIMAL(100*(FLOAT(TOTAL_SECTION_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) AS PCT_TOT_SRT,
NUM_EXECUTIONS,
DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) AS PCT_TOT_EXEC,
DECIMAL(FLOAT(STMT_EXEC_TIME)/FLOAT(NUM_EXECUTIONS),10,2) AS AVG_EXEC_TIME
FROM TABLE(MON_GET_PKG_CACHE_STMT ( 'D', NULL, NULL, -2)) AS T, SUM_TAB
WHERE DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) > 10
OR DECIMAL(100*(FLOAT(TOTAL_CPU_TIME)/SUM_TAB.SUM_CPU),5,2) >10
OR DECIMAL(100*(FLOAT(STMT_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) >10
OR DECIMAL(100*(FLOAT(TOTAL_SECTION_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) >10
OR DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) >10
ORDER BY ROWS_READ DESC FETCH FIRST 20 ROWS ONLY WITH UR"
**Show me problematic SQLs from package cache ( < 9.7 ) :**
WITH SUM_TAB (SUM_RR, SUM_CPU, SUM_EXEC, SUM_SORT, SUM_NUM_EXEC) AS (
SELECT FLOAT(SUM(ROWS_READ)),
FLOAT(SUM(TOTAL_USR_CPU_TIME)) + FLOAT(SUM(TOTAL_SYS_CPU_TIME)),
FLOAT(SUM(TOTAL_EXEC_TIME)),
FLOAT(SUM(TOTAL_SORT_TIME)),
FLOAT(SUM(NUM_EXECUTIONS))
FROM SYSIBMADM.SNAPDYN_SQL AS T
)
SELECT
SUBSTR(STMT_TEXT,1,10) as STATEMENT,
ROWS_READ,
DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) AS PCT_TOT_RR,
TOTAL_USR_CPU_TIME + TOTAL_SYS_CPU_TIME as TOTAL_CPU_TIME,
DECIMAL(100*((FLOAT(TOTAL_USR_CPU_TIME)+FLOAT(TOTAL_SYS_CPU_TIME))/SUM_TAB.SUM_CPU),5,2) AS PCT_TOT_CPU,
TOTAL_EXEC_TIME,
DECIMAL(100*(FLOAT(TOTAL_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) AS PCT_TOT_EXEC,
TOTAL_SORT_TIME,
DECIMAL(100*(FLOAT(TOTAL_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) AS PCT_TOT_SRT,
NUM_EXECUTIONS,
DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) AS PCT_TOT_EXEC,
DECIMAL(FLOAT(TOTAL_EXEC_TIME)/FLOAT(NUM_EXECUTIONS),10,2) AS AVG_EXEC_TIME
FROM SYSIBMADM.SNAPDYN_SQL AS T, SUM_TAB
WHERE DECIMAL(100*(FLOAT(ROWS_READ)/SUM_TAB.SUM_RR),5,2) > 10
OR DECIMAL(100*((FLOAT(TOTAL_USR_CPU_TIME)+FLOAT(TOTAL_SYS_CPU_TIME))/SUM_TAB.SUM_CPU),5,2) >10
OR DECIMAL(100*(FLOAT(TOTAL_EXEC_TIME)/SUM_TAB.SUM_EXEC),5,2) >10
OR DECIMAL(100*(FLOAT(TOTAL_SORT_TIME)/SUM_TAB.SUM_SORT),5,2) >10
OR DECIMAL(100*(FLOAT(NUM_EXECUTIONS)/SUM_TAB.SUM_NUM_EXEC),5,2) >10
ORDER BY ROWS_READ DESC FETCH FIRST 20 ROWS ONLY WITH UR;
**Get execution IDs by total statement exec time :**
db2 "select executable_id,TOTAL_STMT_EXEC_TIME from sysibmadm.MON_PKG_CACHE_SUMMARY order by TOTAL_STMT_EXEC_TIME desc fetch first 10 rows only with ur"
**Get statement from executable_id :**
db2 -x "SELECT varchar(STMT_TEXT,3000) STMT FROM TABLE(MON_GET_PKG_CACHE_STMT(null, x'01000000000000007D0000000000000000000000020020081126172409987719', null, -2)) with ur"
**DB2BATCH Query response time**
db2batch -d -f -q on -o r 0
===== Optimization Classes & Profiles =====
**Optimization Classes :**
* 0 - NLJoin (no indexes)
* 1 -
* 2 - Greedy Joins (use it for simple joins)
* 3 -
* 5 -
* 7 -
* 9 - A lot of thinking (never use it in production)
===== Automatic Maintenance =====
==== Automatic backups ====
**Update automatic backup job :**
CALL SYSPROC.AUTOMAINT_SET_POLICY ( 'AUTO_BACKUP', BLOB('/db2/db2inst1/home ') );
===== Event monitors =====
**Check event monitors :**
db2 "select varchar(evmonname,30) as evmonname, target_type,EVENT_MON_STATE(evmonname) as state,autostart from syscat.eventmonitors with ur"
**Create new event monitor :**
db2 "CREATE EVENT MONITOR FOR WRITE TO TABLE"
**Connection Event monitor with filter :**
db2 "create event monitor ld_conn for connections where appl_name <> 'KUDDB2' write to table"
**Enable/Disable Event monitor :**
db2 set event monitor DB2DETAILDEADLOCK state 0
**Create & Activate the LD_CONN event monitor :**
for db in `db2 list db directory | grep -p Indirect | grep alias | awk '{print $NF}'`; do db2 connect to ${db} >/dev/null;db2 "create event monitor ld_conn for connections where appl_name <> 'KUDDB2' write to table";db2 set event monitor LD_CONN state 1;db2 "select varchar(current schema,9) instance, varchar(current server, 9) database, substr(evmonname,1,30) as evmonname, EVENT_MON_STATE(evmonname) as state from syscat.eventmonitors with ur";db2 terminate >/dev/null; done
**Check status of Event monitors in multiple databases :**
for db in `db2 list db directory | grep -p Indirect | grep alias | awk '{print $NF}'`; do db2 connect to ${db};db2 "select substr(evmonname,1,30) as evmonname, EVENT_MON_STATE(evmonname) as state from syscat.eventmonitors with ur"; done
**Export from Connection event monitor :**
db2 "export to aixreprod01_connections.csv of del select substr(AUTH_ID,1,20) USER, substr(appl_name,1,20) APPLICATION, char(CLIENT_NNAME,30) HOSTNAME,conn_time TIME from CONNHEADER_LD_CONN order by conn_time with ur"
**Activity monitor (with filtering) :**
CREATE EVENT MONITOR DBAACTIVITIES
FOR ACTIVITIES
WRITE TO TABLE
ACTIVITY (TABLE ACTIVITY_DBAACTIVITIES
IN TS32K_DBA
PCTDEACTIVATE 100),
ACTIVITYMETRICS (TABLE ACTIVITYMETRICS_DBAACTIVITIES
IN TS32K_DBA
PCTDEACTIVATE 100),
ACTIVITYSTMT (TABLE ACTIVITYSTMT_DBAACTIVITIES
IN TS32K_DBA
PCTDEACTIVATE 100),
ACTIVITYVALS (TABLE ACTIVITYVALS_DBAACTIVITIES
IN TS32K_DBA
PCTDEACTIVATE 100),
CONTROL (TABLE CONTROL_DBAACTIVITIES
IN TS32K_DBA
PCTDEACTIVATE 100)
MANUALSTART@
create table dba_metrics.config (attr varchar(100),value varchar(100),enabled char(1)) in ts32k_dba@
create unique index dba_metrics.I_CONFIG_U01 on dba_metrics.config (attr,value) include (enabled)@
CREATE OR REPLACE TRIGGER DBA_METRICS.T_CONNECTION_FILTER AFTER INSERT ON DBA_METRICS.MON_CONNECTIONS_DETAILS
REFERENCING NEW AS N
FOR EACH ROW MODE DB2SQL
WHEN (N.HOSTNAME IN (SELECT VALUE FROM DBA_METRICS.CONFIG WHERE ATTR = 'filter_activities' AND ENABLED='1'))
BEGIN ATOMIC
CALL SYSPROC.WLM_SET_CONN_ENV(N.APPLICATION_HANDLE,'WITH DETAILS, SECTION');
END@
CREATE OR REPLACE TRIGGER dba_metrics.T_CONFIG_DISABLE_FILTER AFTER UPDATE ON DBA_METRICS.CONFIG
REFERENCING OLD AS O NEW AS N
FOR EACH ROW MODE DB2SQL
WHEN (N.ENABLED = '0')
BEGIN ATOMIC
DECLARE app_handle BIGINT;
FOR v AS
select distinct application_handle FROM TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS t where t.CLIENT_HOSTNAME = o.value and o.attr = 'filter_activities'
DO
set app_handle = v.application_handle;
CALL SYSPROC.WLM_SET_CONN_ENV(app_handle,'NONE');
END FOR;
END@
===== Sorting =====
**Sort utilization :**
db2 "SELECT APPLICATION_HANDLE,
sum(TOTAL_SORTS) as SUM_TOTAL_SORTS,
sum(TOTAL_SECTION_SORTS) as SUM_TOTAL_SECTION_SORTS,
sum(SORT_OVERFLOWS) as SUM_SORT_OVERFLOWS,
sum(POST_THRESHOLD_SORTS) as SUM_POST_THRESHOLD_SORTS,
decimal(avg(TOTAL_SECTION_PROC_TIME)/1000.0,8,2) as AVG_TOTAL_SECTION_PROC_TIME_SEC,
decimal(avg(TOTAL_SECTION_SORT_PROC_TIME)/1000.0,8,2) as
AVG_TOTAL_SECTION_SORT_PROC_TIME_SEC,
sum(TOTAL_HASH_JOINS) as SUM_TOTAL_HASH_JOINS,
sum(TOTAL_HASH_LOOPS) as SUM_TOTAL_HASH_LOOPS,
sum(HASH_JOIN_OVERFLOWS) as SUM_HASH_JOIN_OVERFLOWS,
sum(ROWS_MODIFIED) as SUM_ROWS_MODIFIED,
sum(POOL_DATA_WRITES) as SUM_POOL_DATA_WRITES
FROM TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS t
GROUP BY APPLICATION_HANDLE with ur"
**Read ratios :**
db2 "WITH READ_METRICS as (
SELECT APPLICATION_HANDLE,
sum(ROWS_READ) as ROWS_READ,
sum(POOL_DATA_L_READS) as POOL_DATA_L_READS,
sum(POOL_INDEX_L_READS) as POOL_INDEX_L_READS,
sum(POOL_TEMP_DATA_L_READS+POOL_TEMP_INDEX_L_READS) as POOL_TEMP_L_READS
FROM TABLE(MON_GET_CONNECTION(cast(NULL as bigint), -2)) AS m
group by application_handle
)
select r.APPLICATION_HANDLE, r.ROWS_READ,
case when POOL_DATA_L_READS+POOL_TEMP_L_READS > 0 then
decimal(r.ROWS_READ*1.00/(POOL_DATA_L_READS+POOL_TEMP_L_READS), 8,2) end as
ROWS_READ_PER_POOL_L_READ,
POOL_DATA_L_READS, POOL_INDEX_L_READS, POOL_TEMP_L_READS,
varchar(STMT_TEXT,100) as STMT_TEXT
from READ_METRICS r left outer join SYSIBMADM.MON_CURRENT_SQL s
ON r.APPLICATION_HANDLE = s.APPLICATION_HANDLE
order by pool_data_l_reads desc with ur"
===== Bufferpools =====
**BP read efficiency :**
db2 "SELECT SUBSTR(BP_NAME, 1, 15) AS BP_NAME, TOTAL_PHYSICAL_READS, AVERAGE_READ_TIME_MS,TOTAL_ASYNC_READS,TOTAL_SYNC_READS,PERCENT_SYNC_READS,ASYNC_NOT_READ_PERCENT FROM SYSIBMADM.BP_READ_IO with ur"
**Page cleaning (pS Castout) rates :**
db2 "select
current timestamp as Time,
case when sum(w.TOTAL_APP_COMMITS) < 100 then null else
cast( float(sum(b.POOL_DATA_WRITES+b.POOL_INDEX_WRITES))
/ sum(w.TOTAL_APP_COMMITS) as decimal(6,1)) end
as \"BP wrt / UOW\",
case when sum(b.POOL_DATA_WRITES+b.POOL_INDEX_WRITES) < 1000 then null else
cast( float(sum(b.POOL_WRITE_TIME))
/ sum(b.POOL_DATA_WRITES+b.POOL_INDEX_WRITES) as decimal(5,1)) end
as \"ms / BP wrt\"
from table(mon_get_workload(null,null)) as w,
table(mon_get_bufferpool(null,null)) as b"
===== TPC-DS Benchmark =====
**Generate SQLs :**
dsqgen -input ../query_templates/templates.lst -directory ../query_templates -dialect db2 -scale 10 -verbose
**Generate DATA :**
dsdgen -SCALE 10 -dir ~/DATA -verbose
**Disk speed :**
#!/bin/bash
myfile=$1
echo "Creating test file $myfile using direct I/O"
dd if=/dev/zero of=$myfile bs=1024M count=12 oflag=direct
sync;sync;sync;echo 3 > /proc/sys/vm/drop_caches
echo "Single Direct I/O writer"
( dd if=/dev/zero of=$myfile bs=64K count=196608 conv=notrunc oflag=direct > thread1.out 2>&1 ) &
wait
cat thread1.out
echo "Two Direct I/O writer"
( dd if=/dev/zero of=$myfile bs=64K count=98304 conv=notrunc oflag=direct > thread1.out 2>&1 ) &
( dd if=/dev/zero of=$myfile bs=64K count=98304 seek=98304 conv=notrunc oflag=direct > thread2.out 2>&1 ) &
wait
cat thread1.out thread2.out