Thursday, May 10, 2012

Oracle plsql procedure for Wide table Quality Check - QC / Data Quality - DQ - tables with large no. of columns

Oracle PL SQL procedure to automatically check for any Leading / Trailing spaces, all [space] and Invalid characters in all VARCHAR / CHAR fields of a table.

create or replace
PROCEDURE Qc_Table_Data (full_tbl_nm IN VARCHAR2, create_table_flag IN NUMBER default null)

IS
  schema_nm     VARCHAR2(20);
  tbl_nm        VARCHAR2(40);
  db_link       VARCHAR2(20);

--  CURSOR c1 IS SELECT column_name FROM all_tab_columns 
--    WHERE table_name = UPPER(tbl_nm) AND owner=UPPER(schema_nm)
--    AND (data_type='VARCHAR2' OR data_type='VARCHAR' OR data_type='CHAR'); 
      
  TYPE tab_col_table_type IS TABLE OF
    user_tab_columns.column_name%Type
    INDEX BY BINARY_INTEGER;
       
  TYPE refcur IS REF CURSOR;
  
  c2            refcur;
  vchar_fields  tab_col_table_type;
  all_fields  tab_col_table_type;
  field_cnt     NUMBER;
  field_cnt_all NUMBER;
  flag          NUMBER;
  dta           VARCHAR2(100);
  src_cur       refcur;
  sql_stmt      VARCHAR2(300);
  fld_nm        VARCHAR2(50);
  temp_var      VARCHAR2(300);
  err_det       VARCHAR2(50);
  insuf_privs exception;
  pragma exception_init(insuf_privs, -1031);
  name_used exception;
  pragma exception_init(name_used, -00955);
  table_not_exist exception;
  pragma exception_init(table_not_exist, -00942);

BEGIN
  field_cnt:=0;
  field_cnt_all:=0;
  flag:=0;
  
  schema_nm:= SUBSTR(full_tbl_nm, 1, INSTR(full_tbl_nm, '.')-1);
  tbl_nm:= CASE WHEN INSTR(full_tbl_nm, '@') <> 0 THEN
                  SUBSTR(full_tbl_nm,  INSTR(full_tbl_nm, '.')+1,  INSTR(full_tbl_nm, '@') - INSTR(full_tbl_nm, '.') -1)
                ELSE
                  SUBSTR(full_tbl_nm,  INSTR(full_tbl_nm, '.')+1, LENGTH(full_tbl_nm)-INSTR(full_tbl_nm, '.'))
           END;
  db_link:= LTRIM(full_tbl_nm, schema_nm||'.'||tbl_nm);
  
--  DBMS_OUTPUT.PUT_LINE(schema_nm||'.'||tbl_nm);
  
  sql_stmt:='SELECT column_name FROM all_tab_columns'||db_link||' 
    WHERE table_name = UPPER('''||tbl_nm||''') AND owner=UPPER('''||schema_nm||''')
    AND (data_type like ''VARCHAR2%'' OR data_type like ''VARCHAR%'' OR data_type like ''CHAR%'')';
    
  OPEN src_cur FOR sql_stmt;
  LOOP
    FETCH src_cur INTO fld_nm;
      IF src_cur%FOUND THEN
        field_cnt:=field_cnt+1;
        vchar_fields(field_cnt):= fld_nm;
      ELSE
        EXIT;
      END IF;
  END LOOP; 
  CLOSE src_cur; 
  sql_stmt:='SELECT column_name FROM all_tab_columns'||db_link||' 
    WHERE table_name = UPPER('''||tbl_nm||''') AND owner=UPPER('''||schema_nm||''')';
    
  OPEN src_cur FOR sql_stmt;
  LOOP
    FETCH src_cur INTO fld_nm;
      IF src_cur%FOUND THEN
        field_cnt_all:=field_cnt_all+1;
        all_fields(field_cnt_all):= fld_nm;
      ELSE
        EXIT;
      END IF;
  END LOOP; 
  CLOSE src_cur; 
--  DBMS_OUTPUT.PUT_LINE(schema_nm||'.'||tbl_nm);


  IF field_cnt_all = 0 THEN
    raise_application_error(-20001,'Table does not exist. Ensure you provdide schema name.');


  ELSE
  IF create_table_flag = 1 THEN
    EXECUTE IMMEDIATE 'CREATE TABLE QC_TABLE_RESULT(TBL_NAME VARCHAR2(50), COL_NAME VARCHAR2(40), ERR_DETAILS VARCHAR2(50), SAMPLE_DATA VARCHAR2(100), ERR_QRY VARCHAR2(300), RUN_TIME DATE)';
  END IF;
--  DBMS_OUTPUT.PUT_LINE(field_cnt);
  DBMS_OUTPUT.PUT_LINE('-------------------------------'); 
--------- For Invalid Characters
  FOR i IN 1..field_cnt
  LOOP 
    OPEN c2 FOR 'SELECT '||vchar_fields(i)||' FROM '||schema_nm||'.'||tbl_nm||' WHERE REGEXP_LIKE('||vchar_fields(i)||', ''[^A-Za-z0-9_ ]'')' ;
    FETCH c2 INTO dta;
      IF c2%FOUND THEN
        flag:=1;
        temp_var:= 'SELECT '||vchar_fields(i)||', '||tbl_nm||'.* FROM '||schema_nm||'.'||tbl_nm||db_link||' WHERE REGEXP_LIKE('||vchar_fields(i)||', ''[^A-Za-z0-9_ ]'')';
        err_det:='INVALID CHARACTERS';
        DBMS_OUTPUT.PUT_LINE('INVALID CHARACTERS in " '||vchar_fields(i)||' "; Sample data : "'||dta||'" ;  '||temp_var);
        IF create_table_flag >= 1 THEN
          EXECUTE IMMEDIATE 'INSERT INTO QC_TABLE_RESULT VALUES(:0,:1,:2,:3,:4,:5)' USING schema_nm||'.'||tbl_nm,vchar_fields(i),err_det,dta,temp_var,SYSDATE;
        END IF;
      END IF;
    CLOSE c2;
  END LOOP;
  COMMIT;
  IF flag=1 THEN
    DBMS_OUTPUT.PUT_LINE(CHR(10)||'NOTE: To modify filter, add those allowed special characters inside "[]" of regular expression and run query manually.');  
    DBMS_OUTPUT.PUT_LINE('Run "set define off" to turn off prompt and for -ve symbol, put at the end only.');
    DBMS_OUTPUT.PUT_LINE('-------------------------------');
  END IF;

--DBMS_OUTPUT.PUT_LINE('1');
--------- For Leading/Trailing spaces
  FOR i IN 1..field_cnt
  LOOP 
    OPEN c2 FOR 'SELECT '||vchar_fields(i)||' FROM '||schema_nm||'.'||tbl_nm||' WHERE TRIM('||vchar_fields(i)||') <> '||vchar_fields(i) ;
    FETCH c2 INTO dta;
      IF c2%FOUND THEN
        flag:=2;
        temp_var:='SELECT '||vchar_fields(i)||', '||tbl_nm||'.* FROM '||schema_nm||'.'||tbl_nm||db_link||' WHERE TRIM('||vchar_fields(i)||') <> '||vchar_fields(i);
        err_det:='LEADING/TRAILING SPACES';
        DBMS_OUTPUT.PUT_LINE('LEADING/TRAILING SPACES in " '||vchar_fields(i)||' "; Sample data : "'||dta||' ";   '||temp_var);
        IF create_table_flag >= 1 THEN
          EXECUTE IMMEDIATE 'INSERT INTO QC_TABLE_RESULT VALUES(:0,:1,:2,:3,:4,:5)' USING schema_nm||'.'||tbl_nm,vchar_fields(i),err_det,dta,temp_var,SYSDATE;
        END IF;
      END IF;
    CLOSE c2;
  END LOOP;
  COMMIT;
  IF flag=2 THEN
    DBMS_OUTPUT.PUT_LINE('-------------------------------'); 
  END IF; 


--------- For ALL spaces
  FOR i IN 1..field_cnt
  LOOP 
    OPEN c2 FOR 'SELECT '||vchar_fields(i)||' FROM '||schema_nm||'.'||tbl_nm||' WHERE TRIM('||vchar_fields(i)||') IS NULL AND '||vchar_fields(i)||' IS NOT NULL';
    FETCH c2 INTO dta;
      IF c2%FOUND THEN
        flag:=3;
        temp_var:='SELECT '||vchar_fields(i)||', '||tbl_nm||'.* FROM '||schema_nm||'.'||tbl_nm||db_link||' WHERE TRIM('||vchar_fields(i)||') IS NULL AND '||vchar_fields(i)||' IS NOT NULL';
        err_det:='ALL SPACES';
        DBMS_OUTPUT.PUT_LINE('ALL SPACES in " '||vchar_fields(i)||' "; Sample data : "'||dta||' ";   '||temp_var);
        IF create_table_flag >= 1 THEN
          EXECUTE IMMEDIATE 'INSERT INTO QC_TABLE_RESULT VALUES(:0,:1,:2,:3,:4,:5)' USING schema_nm||'.'||tbl_nm,vchar_fields(i),err_det,dta,temp_var,SYSDATE;
        END IF;
      END IF;
    CLOSE c2;
  END LOOP;
  COMMIT;
  IF flag=3 THEN
    DBMS_OUTPUT.PUT_LINE('-------------------------------'); 
  END IF;  
  
----------- For ALL NULLs
  FOR i IN 1..field_cnt_all
  LOOP 
    OPEN c2 FOR 'SELECT 1 FROM '||schema_nm||'.'||tbl_nm||' WHERE '||all_fields(i)||' IS NOT NULL' ;
    FETCH c2 INTO dta;
      IF c2%NOTFOUND THEN
        flag:=4;
        dta:= NULL;
        temp_var:='SELECT '||all_fields(i)||', '||tbl_nm||'.* FROM '||schema_nm||'.'||tbl_nm||db_link||' WHERE '||all_fields(i)||' IS NOT NULL';
        err_det:='ALL NULLS';
        DBMS_OUTPUT.PUT_LINE('ALL NULLS in " '||all_fields(i)||' ";  '||temp_var);
        IF create_table_flag >= 1 THEN
          EXECUTE IMMEDIATE 'INSERT INTO QC_TABLE_RESULT VALUES(:0,:1,:2,:3,:4,:5)' USING schema_nm||'.'||tbl_nm,all_fields(i),err_det, dta,temp_var,SYSDATE;
        END IF;
      END IF;
    CLOSE c2;
  END LOOP;
  COMMIT;
  IF flag=4 THEN
    DBMS_OUTPUT.PUT_LINE('-------------------------------'); 
  END IF; 

----------- For Any NULLs
  FOR i IN 1..field_cnt_all
  LOOP 
    OPEN c2 FOR 'SELECT 1 FROM '||schema_nm||'.'||tbl_nm||' WHERE '||all_fields(i)||' IS NULL AND 1 IN (SELECT 1 FROM '||schema_nm||'.'||tbl_nm||' WHERE '||all_fields(i)||' IS NOT NULL)' ;
    FETCH c2 INTO dta;
      IF c2%FOUND THEN
        flag:=4;
        dta:= NULL;
        temp_var:='SELECT '||all_fields(i)||', '||tbl_nm||'.* FROM '||schema_nm||'.'||tbl_nm||db_link||' WHERE '||all_fields(i)||' IS NULL AND 1 IN (SELECT 1 FROM '||schema_nm||'.'||tbl_nm||' WHERE '||all_fields(i)||' IS NOT NULL)' ;
        err_det:='ANY NULL';
        DBMS_OUTPUT.PUT_LINE('ANY NULL in " '||all_fields(i)||' ";  '||temp_var);
        IF create_table_flag >= 1 THEN
          EXECUTE IMMEDIATE 'INSERT INTO QC_TABLE_RESULT VALUES(:0,:1,:2,:3,:4,:5)' USING schema_nm||'.'||tbl_nm,all_fields(i),err_det, dta,temp_var,SYSDATE;
        END IF;
      END IF;
    CLOSE c2;
  END LOOP;
  COMMIT;
  IF flag=4 THEN
    DBMS_OUTPUT.PUT_LINE('-------------------------------'); 
  ELSIF flag=0 THEN 
    DBMS_OUTPUT.PUT_LINE('No issues...!!!');
  END IF;  
  END IF;


EXCEPTION
WHEN table_not_exist THEN
  raise_application_error(-20001,'Create this table CREATE TABLE QC_TABLE_RESULT(TBL_NAME VARCHAR2(50), COL_NAME VARCHAR2(40), ERR_DETAILS VARCHAR2(50), SAMPLE_DATA VARCHAR2(100), ERR_QRY VARCHAR2(300), RUN_TIME DATE)'|| CHR(13)  ||'Then run the procedure with create_table_flag as 2 ');
WHEN NAME_USED THEN
  raise_application_error(-20001,'Table QC_TABLE_RESULT already existing in your schema'|| CHR(13)  ||' Drop existing table and create this table CREATE TABLE QC_TABLE_RESULT(TBL_NAME VARCHAR2(50), COL_NAME VARCHAR2(40), ERR_DETAILS VARCHAR2(50), SAMPLE_DATA VARCHAR2(100), ERR_QRY VARCHAR2(300), RUN_TIME DATE) and then run the procedure with create_table_flag as 2 ');
WHEN INSUF_PRIVS THEN
  raise_application_error(-20001,'Insufficient privilege to create table through procedure.'|| CHR(13)  ||' First create table CREATE TABLE QC_TABLE_RESULT(TBL_NAME VARCHAR2(50), COL_NAME VARCHAR2(40), ERR_DETAILS VARCHAR2(50), SAMPLE_DATA VARCHAR2(100), ERR_QRY VARCHAR2(300), RUN_TIME DATE) and then run the procedure with create_table_flag as 2 ');
WHEN OTHERS THEN
      raise_application_error(-20001,'An error was encountered - '||SQLCODE||' -ERROR- '||SQLERRM);
END;


Procedure will create QC_TABLE_RESULT.
Results will be automatically archived to table  QC_TABLE_RESULT:

SELECT * FROM QC_TABLE_RESULT;

Or

Note: Since results of above procedure are printed using DBMS_OUTPUT.PUT_LINE(), you have to run through Oracle SQL Developer tool --> Navigate to the above procedure "QC_TABLE_DATA" --> Right Click --> Click Run 


While running, input table name and below are the options for 2nd parameter
Parameter CREATE_TABLE_FLAG is optional and works as follows.
NULL    : Output of the procedure will be printed in Script Output tab or Running tab of SQL Developer
1             : Procedure will create table QC_TABLE_RESULT in your schema and insert findings in to it.

2             : Archiving mode - Assuming table QC_TABLE_RESULT is already created; procedure inserts its findings to it.



Test table
CREATE TABLE "QC_TEST_TABLE" ("EMPID" NUMBER, "NAME" VARCHAR2(50 BYTE), "SALARY" NUMBER, "EMP_TITLE" VARCHAR2(50 BYTE));
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME, EMP_TITLE) VALUES ('1', 'Rajesh', 'Analyst');
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME, EMP_TITLE) VALUES ('2', 'Lohit', 'Manager');
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME, EMP_TITLE) VALUES ('3', 'Manish', 'Supervisor');
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME, EMP_TITLE) VALUES ('4', 'Rah$l', 'Admin');
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME, EMP_TITLE) VALUES ('5', ' Pavan', 'Accountant');
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME, EMP_TITLE) VALUES ('6', 'Prasad', '    ');
INSERT INTO "QC_TEST_TABLE" (EMPID, NAME) VALUES ('7', 'Prasad');
COMMIT;

No comments:

Post a Comment