[SQL]檢查資料庫中那些表格欄位可能包含身分證字號

[SQL]檢查資料庫中那些表格欄位可能包含身分證字號

紀錄一下實作過程

問題描述

稽核來檢查了,新版個資法上路在測試環境的個資要做遮蔽,想要知道資料庫中那些表格欄位可能包含身分證字號?

解決方式

建立CRL function來檢查資料庫那些欄位可能是身分證字號

建立方式參照自己的資料

[SQL]使用CLR讀取網頁資料

CLR(C#)


// <copyright file="CSSqlFunction.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
//------------------------------------------------------------------------------
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;

public partial class UserDefinedFunctions
{
    /// <summary>
    /// 檢查是否為身份證字號
    /// </summary>
    /// <returns></returns>
    [Microsoft.SqlServer.Server.SqlFunction]
    public static SqlBoolean CheckId(string ID)
    {        
        if (ID.Trim().Length!=10)
        {
            return  false;
        }
        string NEWID = "";
        int result;
        string restnum = ID.Substring(1, 9);
        if (int.TryParse(restnum, out result) == false)
        {
            return false;
        }
        int[] MULT = new int[] { 1, 9, 8, 7, 6, 5, 4, 3, 2, 1, 1 };
        int SUM = 0;
        bool RESULT = true;
        switch (ID.Substring(0, 1).ToUpper())
        {
            case "A":
                NEWID = "10";
                break;
            case "B":
                NEWID = "11";
                break;
            case "C":
                NEWID = "12";
                break;
            case "D":
                NEWID = "13";
                break;
            case "E":
                NEWID = "14";
                break;
            case "F":
                NEWID = "15";
                break;
            case "G":
                NEWID = "16";
                break;
            case "H":
                NEWID = "17";
                break;
            case "J":
                NEWID = "18";
                break;
            case "K":
                NEWID = "19";
                break;
            case "L":
                NEWID = "20";
                break;
            case "M":
                NEWID = "21";
                break;
            case "N":
                NEWID = "22";
                break;
            case "P":
                NEWID = "23";
                break;
            case "Q":
                NEWID = "24";
                break;
            case "R":
                NEWID = "25";
                break;
            case "S":
                NEWID = "26";
                break;
            case "T":
                NEWID = "27";
                break;
            case "U":
                NEWID = "28";
                break;
            case "V":
                NEWID = "29";
                break;
            case "X":
                NEWID = "30";
                break;
            case "Y":
                NEWID = "31";
                break;
            case "W":
                NEWID = "32";
                break;
            case "Z":
                NEWID = "33";
                break;
            case "I":
                NEWID = "34";
                break;
            case "O":
                NEWID = "35";
                break;
            default:
                return false;
            // break;
        }
        NEWID += ID.Substring(1, 9);
        for (int i = 0; i < NEWID.Length; i++)
        {
            SUM += int.Parse(NEWID.Substring(i, 1)) * MULT[i];
        }
        if (SUM % 10 != 0) RESULT = false;
        return RESULT;
       
    }
    
}

SQL



USE testdb
GO
-- 啟用 CLR 整合
EXEC sp_configure 'clr enabled', 1
GO
RECONFIGURE
GO

-- 檢查是否已經啟用 CLR 整合
EXEC sp_configure 'clr enabled'
GO

-- 查詢 is_trustworthy_on 屬性
SELECT name N'資料庫'
	 , is_trustworthy_on N'是否被標示為可信任'
FROM
	sys.databases
WHERE
	name = 'testdb'
GO

-- Database option TRUSTWORTHY needs to be ON for EXTERNAL_ACCESS
ALTER DATABASE testdb
	SET TRUSTWORTHY ON
	WITH ROLLBACK IMMEDIATE
GO

-- 查詢 is_trustworthy_on 屬性是否開啟
SELECT name N'資料庫'
	 , is_trustworthy_on N'是否被標示為可信任'
FROM
	sys.databases
WHERE
	name = 'testdb'
GO


-- 註冊ASSEMBLY(第二次跑的話要先把與ASSEMBLY相依的物件刪除)
IF  EXISTS
(SELECT *
 FROM
	 sys.objects
 WHERE
	 object_id = object_id(N'[dbo].[uf_CheckID]')
	 AND type IN (N'FN', N'IF', N'TF', N'FS', N'FT')) DROP FUNCTION [dbo].[uf_CheckID]
GO

IF EXISTS
(SELECT name
 FROM
	 sys.assemblies
 WHERE
	 name = 'CheckIDPersonal') DROP ASSEMBLY [CheckIDPersonal]
GO

CREATE ASSEMBLY [CheckIDPersonal] 
FROM 'E:\CLR\CheckIDPersional.dll'
WITH PERMISSION_SET = EXTERNAL_ACCESS;
GO

-- 查詢組件是否建立
SELECT *
FROM
	sys.assemblies;
GO

-- 建立function
IF  EXISTS
(SELECT *
 FROM
	 sys.objects
 WHERE
	 object_id = object_id(N'[dbo].[uf_CheckID]')
	 AND type IN (N'FN', N'IF', N'TF', N'FS', N'FT')) DROP FUNCTION [dbo].[uf_CheckID]
GO
	 
CREATE FUNCTION [dbo].[uf_CheckID]
	(@ID [nvarchar](10))
	RETURNS bit
	WITH EXECUTE AS CALLER
AS 
EXTERNAL NAME [CheckIDPersonal].[UserDefinedFunctions].[CheckId]
GO

-- 測試函式
DECLARE @url nvarchar(10);
SET @url = N'A123456789';
SELECT testdb.dbo.uf_CheckID(@url) AS [身分證字號檢查結果];


-- 應用:找出資料庫所有包含身分證字號的欄位
-- step1. 先判斷資料欄位型態是否符合
-- 型態必須是下列其中一種 nvarchar, nchar, ntext, varchar, char, text
-- 實際長度必須大於等於10
USE testdb
GO

create table #ChoosenColumn 
(	
	tablename sysname,
	columnname sysname,
	sqltext nvarchar(500),
	PersonID bit
);

EXEC sys.sp_MSforeachtable '
insert #ChoosenColumn
SELECT ''?'' as tablename , ColumnName,
'' select distinct ''+ColumnName+'' as colvalue from ''+''?''+'' where ''
+ColumnName+'' is not null '' as sqltext
,0
FROM
	(SELECT columns.name ColumnName
		  , types.name ColumnType
		  , columns.is_identity IsIdentity
		  , columns.is_nullable IsNullable
		  , cast(columns.max_length AS INT) ByteLength
		  , (CASE
				WHEN types.name = ''nvarchar'' AND columns.max_length > 0 THEN
					columns.max_length / 2
				WHEN types.name = ''nchar'' AND columns.max_length > 0 THEN
					columns.max_length / 2
				WHEN types.name = ''ntext'' AND columns.max_length > 0 THEN
					columns.max_length / 2
				ELSE
					columns.max_length
			END) CharLength
		  , cast(columns.scale AS INT) Scale
		  , extended_properties.value Remark
	 FROM
		 sys.columns
		 INNER JOIN sys.types
			 ON columns.system_type_id = types.system_type_id AND columns.user_type_id = types.user_type_id
		 LEFT JOIN sys.extended_properties
			 ON columns.object_id = extended_properties.major_id AND columns.column_id = extended_properties.minor_id
	 WHERE
		 object_id = object_id(''?'')
		 AND types.name IN (''nvarchar'', ''nchar'', ''ntext'', ''varchar'', ''char'', ''text'')) a
WHERE
	a.CharLength >= 10
ORDER BY
	ColumnType
'
-- 顯示過濾後的結果
SELECT row_number() OVER (ORDER BY tablename) AS rno
	 , *
FROM
	#ChoosenColumn; 
	
-- step2. 排序整理欄位資料
CREATE TABLE #tableNameList
(
	rowNum int,
	tablename sysname,
	columnname sysname,
	sqltext nvarchar(500),
	PersonID bit   		
)
GO

INSERT INTO #tableNameList
SELECT row_number() OVER (ORDER BY tablename) AS rno
	 , *
FROM
	#ChoosenColumn
GO

-- 檢查排序後的資料 
SELECT *
FROM
	#tableNameList
	
-- step3. 開始更新資料
-- 根據資料內容判斷是否此欄位為身份證字號
--SELECT count(1)
--FROM
--	(SELECT testdb.dbo.uf_CheckID(colvalue) AS CheckIdResult
--		  , colvalue
--	 FROM
--		 (SELECT DISTINCT ID AS colvalue
--		  FROM
--			  [dbo].[EMPLOYEE] 
--        where ID is not null) c) d
--WHERE
--	CheckIdResult = 1
SET NOCOUNT ON;
DECLARE @count int;
DECLARE @tmp_sqltext nvarchar(500);
DECLARE @update_sqltext nvarchar(1000);
DECLARE @tmp_tablename sysname;
DECLARE @tmp_columnname sysname;
SELECT @count = count(1)
FROM
	#tableNameList;

WHILE @count > 0
BEGIN
SELECT @tmp_sqltext = sqltext
FROM
	#tableNameList
WHERE
	rowNum = @count;

SET @update_sqltext = ' update #tableNameList
 set PersonID = cast((SELECT count(1) FROM
(SELECT testdb.dbo.uf_CheckID(colvalue)as CheckIdResult,colvalue
from 
(' + @tmp_sqltext + ') c)
d WHERE CheckIdResult = 1) as bit) where rowNum=' + cast(@count AS VARCHAR);
EXEC (@update_sqltext);
SELECT @count = @count - 1
END

-- step4. 最後檢查結果:所有包含身分證字號的欄位
SELECT *
FROM
	#tableNameList
WHERE
	PersonID = 1

--驗證資料
--testdb
SELECT DISTINCT ID AS colvalue
FROM
	[dbo].[EMPLOYEE]
SELECT DISTINCT ID AS colvalue
FROM
	[dbo].[EMPLOYEE_BACKUP]
SELECT DISTINCT ID AS colvalue
FROM
	[dbo].[EMPLOYEE_SYNC]
SELECT DISTINCT ID AS colvalue
FROM
	[dbo].[EMPLOYEEJOB]


-- 刪除暫存表
DROP TABLE #ChoosenColumn;
DROP TABLE #tableNameList;


測試結果畫面

pic1

注意事項

資料不一定完全都是身分證資料,有可能前端資料garbage in garbage out,仍需手工確認。