Tuesday, October 13, 2015

Statistic Repository

I have seen posts from many great SQL masters saying that the first thing they do when diagnosing a performance problem is to update the stats or review the stats; I too have seen the power of updated stats, so when an internal customer of mine was having some issues with stats, it became helpful for me to create a Stats Repository to track and monitor changes in statistics.

The idea is, record once a day the stats info on each statistic in a given database, this can be done across servers (Prod, Test, Dev, etc) if you want to compare to see why something runs better on one server and not on another.

The first step is to create a holding table, if you have a DBA database to store maintenance stuff, put it there, but for this post I'll use master:

------------------------------------------------------
-- SETUP STORAGE TABLE TO STORE DAILY STATS DETAILS --
------------------------------------------------------
USE [master]
GO

/****** Object:  Table [dbo].[StatsHistory]  ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

SET ANSI_PADDING ON
GO

CREATE TABLE [dbo].[StatsHistory](
 [ServerName] [sysname] NULL,
 [DatabaseName] [sysname] NULL,
 [TableName] [sysname] NULL,
 [StatName] [sysname] NOT NULL,
 [Updated] [datetime] NULL,
 [TableRows] [int] NULL,
 [RowsSampled] [int] NULL,
 [PercentSampled] [float] NULL,
 [Steps] [int] NULL,
 [Density] [int] NULL,
 [AverageKeyLength] [int] NULL,
 [StringIndex] [varchar](3) NULL,
 [FilterExpression] [varchar](512) NULL,
 [UnfilteredRows] [int] NULL,
 [StatColumns] [varchar](500) NULL,
 [BatchRunID] [int] NULL,
 [CreateDate] [datetime] NULL
) ON [PRIMARY]
The next step is to create a function that will be used to concatenate the columns for the Stat into a list. Because of how sys.stats_columns works, you'll need to put this in every database you want to monitor

-----------------------------------------------------
-- NEEDS TO BE RUN FOR EVERY DB YOU ARE MONITORING --
--     THIS FUNCTION CREATES A LIST OF COLUMNS     --
--       INCLUDED IN THE PASSED IN STATISTIC       --
-----------------------------------------------------

USE []
GO

/****** Object:  UserDefinedFunction [dbo].[StatColumnIDToList]    Script Date: 12/19/2014 10:11:01 ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

-- =============================================
-- Author:  Eric Zierdt
-- Create date: 12/9/2014
-- Description: Converts the ColumnIDs to a List
-- USE: SELECT dbo.StatColumnIDToList(357576312,2)
-- =============================================
CREATE FUNCTION [dbo].[StatColumnIDToList] 
(
 -- Add the parameters for the function here
 @ObjectID INT
 ,@StatsID INT
)
RETURNS VARCHAR(2000)
AS
BEGIN
 -- Declare the return variable here
 DECLARE @List VARCHAR(2000)

 SELECT @List = COALESCE(@List + ',', '') + Cast(SC.column_id As varchar(5))
 FROM sys.stats_columns SC 
 WHERE SC.object_id = @ObjectID AND SC.stats_id = @StatsID

 -- Return the result of the function
 RETURN @List

END
The next step is to create a job which you will schedule to run daily. This job should execute the following code to generate the stat info and insert it into the table we made in step 1:

-- =================================================
-- Author:  Eric Zierdt
-- Create date: 12/9/2014
-- Description: INSERTS STATS DATA INTO REPOSITORY
-- =================================================
DECLARE @ServerName VARCHAR(120)
  ,@DBName VARCHAR(120)
  ,@TableName VARCHAR(120)
  ,@StatName VARCHAR(120)
  ,@SQL VARCHAR(MAX)
  ,@BatchRunID INT
  ,@ColumnList VARCHAR(2000)
  
SELECT @BatchRunID = MAX(BatchRunID)+1
FROM master.dbo.StatsHistory
  

CREATE TABLE #StatsTable (
 ServerName SYSNAME NULL
 ,DatabaseName SYSNAME NULL
 ,TableName SYSNAME NULL
 ,StatName SYSNAME
 ,Updated DATETIME
 ,TableRows INT
 ,RowsSampled INT
 ,PercentSampled AS CAST(RowsSampled AS FLOAT)/CAST(TableRows AS FLOAT)*100 
 ,Steps INT
 ,Density INT
 ,AverageKeyLength INT
 ,StringIndex VARCHAR(3)
 ,FilterExpression VARCHAR(512)
 ,UnfilteredRows INT
 ,StatColumns VARCHAR(500)
 ,BatchRunID INT
 ,CreateDate DATETIME
)

CREATE NONCLUSTERED INDEX [IDX_StatTable_Name] ON #StatsTable
(
 [TableName] ASC
) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF
  , ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [Primary]

DECLARE ZCursor CURSOR LOCAL FAST_FORWARD FOR
(
    SELECT  @@SERVERNAME AS ServerName, DB_NAME() AS DBName, OBJECT_NAME(object_id) AS TableName 
           ,name AS StatName, dbo.StatColumnIDToList(object_id,stats_id)
    FROM    sys.stats
    WHERE   OBJECT_NAME(object_id) NOT LIKE ''sys%''
            AND OBJECT_NAME(object_id) NOT LIKE ''MS%''
            AND OBJECT_NAME(object_id) NOT LIKE ''queue%'' 
            AND OBJECT_NAME(object_id) NOT LIKE ''filestream%'' 
            --AND OBJECT_NAME(object_id) = ''MSdistribution_agents''
)
OPEN ZCursor 
FETCH NEXT FROM ZCursor INTO @ServerName, @DBName, @TableName,@StatName,@ColumnList
 WHILE @@FETCH_STATUS = 0
 BEGIN
  --PRINT @StatName
  SET @SQL = ''DBCC SHOW_STATISTICS ('' + @TableName+ '',"'' + @StatName +''") WITH STAT_HEADER,NO_INFOMSGS ;''
  --PRINT @SQL
  INSERT INTO #StatsTable
  (
   StatName
   ,Updated
   ,TableRows
   ,RowsSampled
   ,Steps
   ,Density
   ,AverageKeyLength
   ,StringIndex
   ,FilterExpression
   ,UnfilteredRows
  )
  EXEC(@SQL)

  UPDATE #StatsTable
  SET TableName = @TableName
   ,DatabaseName = @DBName
   ,ServerName = @ServerName
   ,BatchRunID = @BatchRunID
   ,StatColumns = @ColumnList
   ,CreateDate = GETDATE()
  WHERE TableName IS NULL

  FETCH NEXT FROM ZCursor INTO @ServerName, @DBName, @TableName,@StatName,@ColumnList
 END

CLOSE ZCursor
DEALLOCATE ZCursor

--SELECT * FROM #StatsTable WHERE TableRows <> RowsSampled ORDER BY TableName

INSERT INTO master.dbo.StatsHistory
(
 ServerName
 ,DatabaseName
 ,TableName
 ,StatName
 ,Updated
 ,TableRows
 ,RowsSampled
 ,PercentSampled
 ,Steps
 ,Density
 ,AverageKeyLength
 ,StringIndex
 ,FilterExpression
 ,UnfilteredRows
 ,StatColumns
 ,BatchRunID
 ,CreateDate 
)
SELECT ServerName
  ,DatabaseName
  ,TableName
  ,StatName
  ,Updated
  ,TableRows
  ,RowsSampled
  ,PercentSampled
  ,Steps
  ,Density
  ,AverageKeyLength
  ,StringIndex
  ,FilterExpression
  ,UnfilteredRows
  ,StatColumns
  ,BatchRunID
  ,CreateDate 
FROM #StatsTable

DROP TABLE #StatsTable
Now you can do some fun things with your repository table, like this:

-- =====================================================
-- Author:  Eric Zierdt
-- Create date: 12/9/2014
-- Description: SAMPLE CODE TO QUERY STATS REPOSITORY
-- =====================================================
DECLARE @BatchRunID INT
SELECT  @BatchRunID = MAX(BatchRunID) FROM master.dbo.StatsHistory

/**  SEE ALL STATS **/
SELECT  *
FROM    master.dbo.StatsHistory
WHERE BatchRunID = @BatchRunID

/**  SEE ALL STATS OLDER THAN 1 MONTH  **/
SELECT  *
FROM    master.dbo.StatsHistory
WHERE BatchRunID = @BatchRunID
  AND Updated < DATEADD(MONTH,-1,GETDATE())

/**  SEE ALL NON-FULLSCAN STATS  **/
SELECT  *
FROM    master.dbo.StatsHistory
WHERE BatchRunID = @BatchRunID
  AND PercentSampled < 100


/**  CREATE UPDATE STAT SCRIPT FOR ALL NON-FULLSCAN STATS  **/
SELECT  TableName, StatName, TableRows, PercentSampled, 'UPDATE STATISTICS dbo.' + TableName + ' ' + StatName + ' WITH FULLSCAN --' + CAST(TableRows AS VARCHAR(10)) AS SQLCode
FROM    master.dbo.StatsHistory
WHERE   BatchRunID = @BatchRunID
  AND PercentSampled < 100