We can perform incremental load using procedures in database and schedule accordingly to load the data,To do this follow the steps
1. Create a table to store the last run information, basically to capture the last load run date
CREATE TABLE LOAD_HISTORY(LOAD_ID NUMBER NOT NULL, TABLE_NAME VARCHAR2(500),LAST_EXTRACT_DATE DATE);
2. Run the Full load and update the LOAD_HISTORY table with the load details
Let say
3. Create a Package in the database with Incremental Load logic
Assume You have the source (SOURCE_TBL ) and target (TARGET_TBL ) table have same structure ,let say Having these columns in both
RECORD_ID, PARTICIPANT_NAME, AGE, MODIFIED_DATE
--Package Spec
create or replace PACKAGE LOAD_DATA
AS
PROCEDURE LOAD_TARGET_TBL;
END LOAD_DATA;
--Package body
create or replace PACKAGE BODY LOAD_DATA
AS
MYCURRENT_DATE DATE;
LAST_MODIFIED_DATE DATE;
PROCEDURE LOAD_TARGET_TBL
AS
BEGIN
SELECT SYSDATE INTO MYCURRENT_DATE FROM DUAL;
SELECT LAST_EXTRACT_DATE INTO LAST_MODIFIED_DATE
FROM LOAD_HISTORY
WHERE TABLE_NAME=’TARGET_TBL’;
MERGE
INTO TARGET_TBL TAR
USING (SELECT * FROM SOURCE_TBL WHERE MODIFIED_DATE > LAST_MODIFIED_DATE ) SRC
ON(TAR.RECORD_ID=SRC.RECORD_ID)
WHEN MATCHED THEN
UPDATE SET
--TAR.RECORD_ID=SRC.RECORD_ID //We cannot update this column since it has been used in ON clause
TAR. PARTICIPANT_NAME=SRC. PARTICIPANT_NAME,
TAR. AGE=SRC. AGE,
TAR. MODIFIED_DATE =SRC. MODIFIED_DATE
WHEN NOT MATCHED THEN
INSERT (TAR.RECORD_ID, TAR. PARTICIPANT_NAME, TAR. AGE, TAR. MODIFIED_DATE)
VALUES(SRC.RECORD_ID, SRC. PARTICIPANT_NAME, SRC. AGE, SRC. MODIFIED_DATE );
COMMIT;
UPDATE LOAD_HISTORY SET LAST_EXTRACT_DATE=MYCURRENT_DATE
WHERE TABLE_NAME=’TARGET_TBL’;
COMMIT;
END LOAD_TARGET_TBL;
3.Create a Job to and Schedule it according to your choice
BEGIN
DBMS_SCHEDULER.create_program (
program_name => 'LOAD_TARGET_TBL,
program_type => 'STORED_PROCEDURE',
program_action => 'LOAD_DATA. LOAD_TARGET_TBL’,
enabled => TRUE;
END;
BEGIN
DBMS_SCHEDULER.create_schedule (
schedule_name => 'SCHED_TARGET_TBL,
start_date=> trunc(sysdate)+12/24 ,
repeat_interval => 'FREQ=DAILY; BYHOUR=12,24;BYMINUTE=1;',
end_date => NULL;
END;
BEGIN
DBMS_SCHEDULER.create_job (
job_name => 'JOB_TARGET_TBL',
program_name => 'LOAD_TARGET_TBL',
schedule_name => 'SCHED_TARGET_TBL',
enabled => TRUE;
END;
Run this script in the database then Incremental load will happens for every 12 hours daily.
DROPing Job,Schedule and Program
EXEC DBMS_SCHEDULER.drop_job('JOB_NAME');
EXEC DBMS_SCHEDULER.drop_SCHEDULE ('SCHEDULE_NAME');
EXEC DBMS_SCHEDULER.drop_program('PROGRAM_NAME');
Restarting Job
EXEC DBMS_SCHEDULER.disable('JOB_NAME');
EXEC DBMS_SCHEDULER.enable('JOB_NAME');
1. Create a table to store the last run information, basically to capture the last load run date
CREATE TABLE LOAD_HISTORY(LOAD_ID NUMBER NOT NULL, TABLE_NAME VARCHAR2(500),LAST_EXTRACT_DATE DATE);
2. Run the Full load and update the LOAD_HISTORY table with the load details
Let say
3. Create a Package in the database with Incremental Load logic
Assume You have the source (SOURCE_TBL ) and target (TARGET_TBL ) table have same structure ,let say Having these columns in both
RECORD_ID, PARTICIPANT_NAME, AGE, MODIFIED_DATE
--Package Spec
create or replace PACKAGE LOAD_DATA
AS
PROCEDURE LOAD_TARGET_TBL;
END LOAD_DATA;
--Package body
create or replace PACKAGE BODY LOAD_DATA
AS
MYCURRENT_DATE DATE;
LAST_MODIFIED_DATE DATE;
PROCEDURE LOAD_TARGET_TBL
AS
BEGIN
SELECT SYSDATE INTO MYCURRENT_DATE FROM DUAL;
SELECT LAST_EXTRACT_DATE INTO LAST_MODIFIED_DATE
FROM LOAD_HISTORY
WHERE TABLE_NAME=’TARGET_TBL’;
MERGE
INTO TARGET_TBL TAR
USING (SELECT * FROM SOURCE_TBL WHERE MODIFIED_DATE > LAST_MODIFIED_DATE ) SRC
ON(TAR.RECORD_ID=SRC.RECORD_ID)
WHEN MATCHED THEN
UPDATE SET
--TAR.RECORD_ID=SRC.RECORD_ID //We cannot update this column since it has been used in ON clause
TAR. PARTICIPANT_NAME=SRC. PARTICIPANT_NAME,
TAR. AGE=SRC. AGE,
TAR. MODIFIED_DATE =SRC. MODIFIED_DATE
WHEN NOT MATCHED THEN
INSERT (TAR.RECORD_ID, TAR. PARTICIPANT_NAME, TAR. AGE, TAR. MODIFIED_DATE)
VALUES(SRC.RECORD_ID, SRC. PARTICIPANT_NAME, SRC. AGE, SRC. MODIFIED_DATE );
COMMIT;
UPDATE LOAD_HISTORY SET LAST_EXTRACT_DATE=MYCURRENT_DATE
WHERE TABLE_NAME=’TARGET_TBL’;
COMMIT;
END LOAD_TARGET_TBL;
3.Create a Job to and Schedule it according to your choice
BEGIN
DBMS_SCHEDULER.create_program (
program_name => 'LOAD_TARGET_TBL,
program_type => 'STORED_PROCEDURE',
program_action => 'LOAD_DATA. LOAD_TARGET_TBL’,
enabled => TRUE;
END;
BEGIN
DBMS_SCHEDULER.create_schedule (
schedule_name => 'SCHED_TARGET_TBL,
start_date=> trunc(sysdate)+12/24 ,
repeat_interval => 'FREQ=DAILY; BYHOUR=12,24;BYMINUTE=1;',
end_date => NULL;
END;
BEGIN
DBMS_SCHEDULER.create_job (
job_name => 'JOB_TARGET_TBL',
program_name => 'LOAD_TARGET_TBL',
schedule_name => 'SCHED_TARGET_TBL',
enabled => TRUE;
END;
Run this script in the database then Incremental load will happens for every 12 hours daily.
DROPing Job,Schedule and Program
EXEC DBMS_SCHEDULER.drop_job('JOB_NAME');
EXEC DBMS_SCHEDULER.drop_SCHEDULE ('SCHEDULE_NAME');
EXEC DBMS_SCHEDULER.drop_program('PROGRAM_NAME');
Restarting Job
EXEC DBMS_SCHEDULER.disable('JOB_NAME');
EXEC DBMS_SCHEDULER.enable('JOB_NAME');
Great job in putting the whole process in simple to understand language. I was totally unaware of this topic now I can confidently respond to question regarding Incremental loads.
ReplyDeleteI am very surprised not many people have rebounded to your posting.
Keep it up.
Thanks , Great post.
ReplyDelete