0

I have been trying to upload around 18 million records from Oracle to SQL Server. The problem is the loading task which take days to complete even though the data is not so much. I have marked parts of the code relative to their respective time taken to completely execute.

declare @v_sql VARCHAR(MAX)
declare @column_name VARCHAR(MAX) 
declare @column_name2 VARCHAR(MAX) =''
declare @column_name3 VARCHAR(MAX) =''
declare @tablename VARCHAR(MAX) ='TITLE_BLOCK_PARTS' --input parameter
declare @v_sql2 VARCHAR(MAX)=''
declare @temp_table VARCHAR(500) = '##'+@tablename+'_T'
declare @temp_table2    VARCHAR(500)
declare @v_entityid int = 14 --input parameter
declare @vcount INT = 1
declare @v_total_records bigint
DECLARE @CountResults TABLE (CountReturned bigint)
declare @v_total_ei bigint
declare @v_max_ei bigint
declare @v_total_no_col int
declare @col_to_exclude VARCHAR(MAX) = ',ID,CLASS,'
declare @stg_table vARCHAR(500)  = '##'+@tablename+'_TEMP'
declare @v_success int = -1
declare @v_tablename VARCHAR(MAX) = @tablename
declare @itemnumber varchar(500) ='';
declare @rev varchar(500) ='';
declare @v_stg_sql  VARCHAR(MAX) = ''
declare @v_temp_table vARCHAR(500)  = '##'+@tablename+'_T_ORI'
declare @v_sql3 VARCHAR(MAX) = ''
declare @v_countrownum bigint = 1
declare @v_rownum bigint 
DECLARE @v_rownumtab TABLE (ROWCountReturned bigint)

BEGIN

--SET @col_to_exclude =  ''''+REPLACE(@col_to_exclude,',',''',''') +'''' 

--Start: Following code takes maximum 5 to 7 minutes to complete
IF EXISTS (SELECT * FROM sys.tables WHERE name = @v_tablename)
Begin 
set @v_sql = ' DROP TABLE ' + @v_tablename
exec (@v_sql)
END

set @v_sql = 'SELECT *
into ' + @tablename +'
  FROM [ORCLTST]..[AGILE].'+@v_tablename;

exec (@v_sql)

--End: Following code takes maximum 5 to 7 minutes to complete

--Start: Following code takes maximum 4 minutes to complete
IF OBJECT_ID('tempdb..'+@stg_table) IS NOT NULL 
exec ('drop table ' + @stg_table)

SET @v_sql = 'SELECT * 
INTO '+@stg_table+
'
FROM ' + @tablename;

EXEC (@v_sql);

set @v_sql = ''

set @v_total_no_col = (SELECT COUNT(COLUMN_NAME)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND COLUMN_NAME
NOT IN 
(
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND 
charindex(',' + CAST(COLUMN_NAME as nvarchar(20)) + ',', @col_to_exclude) > 0
)
)


DECLARE get_columns CURSOR FOR  
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND COLUMN_NAME
NOT IN 
(
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND 
charindex(',' + CAST(COLUMN_NAME as nvarchar(20)) + ',', @col_to_exclude) > 0
)

--SELECT COLUMN_NAME
--FROM INFORMATION_SCHEMA.COLUMNS
--WHERE TABLE_NAME = @tablename
--AND COLUMN_NAME NOT IN (@col_to_exclude)
--@tablename
--print @sql3

OPEN get_columns  
FETCH NEXT FROM get_columns INTO @column_name

WHILE @@FETCH_STATUS = 0          
 BEGIN  
--PRINT @column_name        

SET @column_name2 = replace(REPLACE(REPLACE(LTRIM(RTRIM(REPLACE(@column_name,'"',''))),')',''),'(',''),' ','')

--replace(@column_name2
--print 'ALTER TABLE ' + @sql5+ ' DROP constraint ' + @sql3
set @v_sql2+='CAST('+'['+@column_name+']'+' AS VARCHAR(8000))'+ 'AS '+ @column_name2 + ',
'
+
'
'


SET @v_sql3 += 'CAST('+'ISNULL(['+@column_name+'],'''')'+' AS VARCHAR(8000))'+ 'AS '+ @column_name2 + ',
'
+
'
'

SET @column_name3 += @column_name2 + ',
'+

'  
'

FETCH NEXT FROM get_columns INTO  @column_name        

      end  
     CLOSE get_columns          
 DEALLOCATE get_columns 
--Print @v_sql2   
set @v_sql2 = left(@v_sql2, len(@v_sql2)-5)

set @v_sql3 = left(@v_sql3, len(@v_sql3)-5)

SET @column_name3 = left(@column_name3, len(@column_name3)-7)

IF OBJECT_ID('tempdb..'+@temp_table) IS NOT NULL 
exec ('drop table ' + @temp_table)

IF OBJECT_ID('tempdb..'+@temp_table+'2') IS NOT NULL 
exec ('drop table ' + @temp_table+'2')

IF OBJECT_ID('tempdb..'+@v_temp_table) IS NOT NULL 
exec ('drop table ' + @v_temp_table)

IF OBJECT_ID('tempdb..'+@v_temp_table+'2') IS NOT NULL 
exec ('drop table ' + @v_temp_table+'2')

SET @v_sql = 'Select
    ROW_NUMBER() 
        OVER (ORDER BY number, REV) AS Row, 
        '
            +
    @v_sql2
    +
    '
    '
    +   


 '   INTO ' + @v_temp_table + ' 
 from 

 '

 +
 @tablename

PRINT @v_sql
EXEC (@V_SQL)



SET @v_sql = 'Select
    ROW_NUMBER() 
        OVER (ORDER BY number, REV) AS Row, 
        '
            +
    @v_sql3
    +
    '
    '
    +   


 '   INTO ' + @v_temp_table+'2' + ' 
 from 

 '

 +
 @v_temp_table

PRINT @v_sql
EXEC (@V_SQL)

SET @v_sql ='
Select  ROW
 ,ColumnName
       , ColumnValue
       INTO ' + @temp_table + ' 
FROM 
(
    Select
    ROW,
    '
    +
    @v_sql3
    +
    '
    '
    +
'

'
+   

 '   from 

 ' 
 +
 @v_temp_table+'2'
 +
 '
 '
 +
 '
 )t  
   UNPIVOT (ColumnValue for ColumnName 
           IN (
           '
           +
           @column_name3
           +
           '

           '
           +

            '   
               )
            )UP
            ';


PRINT @v_sql
--END;
--/*
EXEC (@V_SQL)

set @v_sql = ''

/*set @v_sql = 'with q1 as  (
SELECT t2.Id, t2.displayorder,t2.DisplayName ,T2.[Group],t1.* 
'
 +
 ' 
FROM '+@temp_table+ ' t1
INNER JOIN PLMColumn t2
ON t1.ColumnName = t2.name
where t2.EntityId = ' + cast(@v_entityid as varchar(100))
+
')
,q2 as(
    select      Id, ColumnName, displayorder,[Group], count(*) as c_nnum
    from q1

group by Id,ColumnName,displayorder,[Group])

select t1.id, t1.displayorder, t1.ColumnName, t2.columnvalue
into   '+ @temp_table+'2'+
'
from q2 t1
INNER JOIN '+@temp_table+ ' t2
ON t1.ColumnName = t2.ColumnName
WHERE T1.[Group] <> ''PAGE THREE''

' */

 --

set @temp_table2 = replace(@temp_table+'2', '##','');

IF EXISTS (SELECT * FROM sys.tables WHERE name = @temp_table2)
Begin 
set @v_sql = ' DROP TABLE ' + @temp_table2
exec (@v_sql)
END

set @v_Sql = 'SELECT t1.Row,t2.Id, t2.displayorder,t2.DisplayName ,T2.[Group],t1.ColumnName, t1.ColumnValue
 into   '+ @temp_table2 +
' FROM '+@temp_table+ ' t1
INNER JOIN PLMColumn t2
ON t1.ColumnName = t2.name
where t2.EntityId = ' + cast(@v_entityid as varchar(100)) + '
and T2.[Group] <> ''PAGE THREE'''

print @v_sql
exec(@v_sql)


set @v_sql = ' CREATE INDEX idx_'+@temp_table2+'_row
ON '+ @temp_table2 + ' (row)';

print @v_sql
exec(@v_sql)
--end
--/*
--set @v_sql = 'select COUNT(*) from ' + @tablename
SELECT @v_sql = N'SELECT COUNT(*) FROM' + QUOTENAME(@tablename);

INSERT @CountResults
EXEC(@v_sql)


set @v_total_records = (SELECT CountReturned FROM @CountResults)
print '@v_total_records '+ cast(@v_total_records as varchar(5000)) 

set @v_max_ei = (SELECT IDENT_CURRENT ('PLMENTITYINSTANCE'))
set @v_max_ei = @v_max_ei + 1
--(select ISNULL(MAX (id),0) from PLMEntityInstance)

print '@v_max_ei '+ cast(@v_max_ei as varchar(5000)) 

set @v_total_ei = ISNULL(@v_total_records,0) + @v_max_ei
print '@v_total_ei '+ cast(@v_total_ei as varchar(5000)) 
--END;


IF EXISTS (SELECT * FROM sys.tables WHERE name = 'ITEM_STG')
Begin 
set @v_sql = ' DROP TABLE ITEM_STG'
exec (@v_sql)
END
set @v_sql = 'CREATE TABLE [dbo].[ITEM_STG](
    [INSTANCE_ID] [bigint] NOT NULL,
    [ITEM_NUMBER] [varchar](max) NOT NULL,
    [REV] [varchar](max) NOT NULL)'

exec (@v_sql)

set @v_sql = ''
--END
--/*
WHILE (@vcount <= @v_total_records)
BEGIN

INSERT INTO PLMEntityInstance (EntityId)
VALUES (@v_entityid)


SET @vcount = @vcount + 1
--print 
END 


set @v_sql = ''

set @v_sql = N'with c AS
(select DISTINCT ROW from ' + QUOTENAME(@temp_table2) +')

select count(*) from c

'

INSERT @v_rownumtab
EXEC(@v_sql)


set @v_rownum = (select * from @v_rownumtab)

print @v_rownum

--End: Following code takes maximum 4 minutes to complete

--end
--/*


--Start: Following code takes days to complete
while (@v_max_ei <= @v_total_ei AND @v_countrownum <=@v_rownum)
BEGIN

set @v_sql = ''
--top('+CAST(@v_total_no_col AS varchar(100))+') 
set @v_sql = '

with q2
as(

select 

* from '
+ @temp_table2
 +
 '
 where row = '+ CAST(@v_countrownum AS varchar(5000)) + '

 )
INSERT INTO PLMColumnValue(EntityInstanceId, ColumnID, Value)

select '+ CAST(@v_max_ei AS varchar(5000)) + ' as Entityinstanceid, t.id as column_id, t.columnvalue from q2 t
order by DisplayOrder
'

print @v_countrownum
print @v_sql
exec (@v_sql)


set @itemnumber = (select Value from PLMColumnValue where columnid=(select id from PLMColumn where EntityId = @v_entityid and Name = 'number') and EntityInstanceId =  @v_max_ei)
set @rev = (select Value from PLMColumnValue where columnid=(select id from PLMColumn where EntityId = @v_entityid and Name = 'rev') and EntityInstanceId =  @v_max_ei)

print @itemnumber
print @rev

SET @v_stg_sql = ''
set @v_stg_sql = '
INSERT INTO ITEM_STG (INSTANCE_ID,ITEM_NUMBER,REV)
select ' +  CAST(@v_max_ei AS varchar(5000)) + ' , ' + ''''+@itemnumber+'''' + ' , '  +''''+ @rev+'''' + ''

print @v_stg_sql
exec (@v_stg_sql)


set @itemnumber = '';
set @rev = '';

SET @v_stg_sql = ''
set @v_sql = ''

SET @v_max_ei = @v_max_ei + 1;
set @v_countrownum = @v_countrownum + 1;
END

--End: Following code takes days to complete

END;

The main tables under use are PlmEntityInstance, PlmColumnValue and ITEM_STG. PlmColumnValue table is where all the migrated data is loaded, this table has an EAV model and here the code take ages to load the data. How can I optimize this process? Creating an index on the table where all the data is stored after all the required modifications and before loading (its name is stored in variable @temp_table2) did help a lot but still its not good enough. I initiated loading process yesterday at 2100 local time and its been 15 hours and it has loaded only 149088 records out of 18 million records. Please Help!

If there is not enough information provided in question please let me know.

Please see below wait stats

WaitType    Wait_S  Resource_S  Signal_S    WaitCount   Percentage  AvgWait_S   AvgRes_S    AvgSig_S
CXPACKET    657100.57   636945.69   20154.88    338749067   59.49   0.0019  0.0019  0.0001
PREEMPTIVE_OLEDBOPS 268405.70   268405.70   0.00    24041   24.30   11.1645 11.1645 0.0000
OLEDB   91213.76    91213.76    0.00    34867   8.26    2.6160  2.6160  0.0000
ASYNC_NETWORK_IO    33316.35    32352.43    963.93  4060699 3.02    0.0082  0.0080  0.0002

Sample data is placed on the following link: https://www.dropbox.com/home/Public/Query/

The data in the csv provided in the above link has 8531 records.

You can initialize parameter @temp_table2 with 'TITLE_BLOCK_PARTS'

1 Answers1

1

Replace all WHILE loops with set based INSERT using tally table. For example replace

declare @vcount INT = 1
WHILE (@vcount <= @v_total_records)
BEGIN

    INSERT INTO PLMEntityInstance (EntityId)
    VALUES (@v_entityid)

    SET @vcount = @vcount + 1

END 

with

INSERT INTO PLMEntityInstance (EntityId)
SELECT @v_entityid 
FROM Tally 
WHERE N <= @v_total_records

Tally can be created in multiple ways, for example, 20 million rows tally:

WITH E1(N) AS (
                SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
                SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
                SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
            ),                          
E2(N) AS (SELECT 1 FROM E1 a, E1 b), 
E4(N) AS (SELECT 1 FROM E2 a, E2 b) --10E+4 or 10,000 rows max
SELECT TOP 20000000
   IDENTITY(INT,1,1) AS N
INTO dbo.Tally
FROM E4 a, E4 b; --100,000,000 rows max

ALTER TABLE dbo.Tally
    ADD CONSTRAINT PK_Tally_N 
        PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100;

EDIT
Concerning refactoring your last WHILE to set based code. I'm not sure i fully understand what the whole script is doing, but it's too long to comment. So first look at original code of the last loop:

@v_countrownum = 1
...
@v_max_ei = ...
while (@v_max_ei <= @v_total_ei AND @v_countrownum <=@v_rownum)
BEGIN
...
SET @v_max_ei = @v_max_ei + 1;
set @v_countrownum = @v_countrownum + 1;
END

Let’s refactor it to iterator query:

...
@v_max_ei = ...
WITH iterator AS (
    SELECT v_max_ei = @v_max_ei + N - 1
        ,v_countrownum = N
    FROM tally
    WHERE @v_max_ei + N - 1 <= @v_total_ei AND N <= @v_rownum
)
SELECT * 
FROM iterator

Next, your loop unconditionally adds rows to PLMColumnValue and ITEM_STG at every iteration, so we must use LEFT JOIN of iterator and source data tables and take care of NULLs. I see your code defaults missing data to ''. Also I need some test data, which are generated this way using subset of PLMColumn fields from your dataset.

-- test data generator
DROP TABLE title_block3;
GO
SELECT row = N, id, DisplayOrder
    ,columnvalue = c.NAME + '_val_0' + cast(N AS VARCHAR(3))
INTO title_block3
FROM PLMColumn c
JOIN tally ON tally.N <= 3
WHERE c.NAME IN ('number','rev' ) OR c.NAME LIKE 'text%'

SELECT * FROM  title_block3;

So finally the script to replace your last WHILE. I use arbitrary variables values instead of computed by the first part of your script. @v_rownum = 3 according to test data.

declare @temp_table2 varchar(100);
declare @v_sql nvarchar(max);
declare @v_max_ei int;
declare @v_entityid int;
declare @v_total_ei int;
declare @v_rownum int;

set @temp_table2 = 'title_block3';  -- see generator

set @v_max_ei = 120000;
set @v_total_ei = 200000;
set @v_rownum = 3;
set @v_entityid = 14;

--Start: Following code takes days to complete

set @v_sql = 
'WITH iterator AS (
    SELECT v_max_ei = '+ CAST(@v_max_ei AS varchar(5000)) + ' + N - 1
        ,v_countrownum = N
    FROM tally
    WHERE '+ CAST(@v_max_ei AS varchar(5000)) + ' + N - 1 <= '+ CAST(@v_total_ei AS varchar(5000)) + 
    ' AND N <= '+ CAST(@v_rownum AS varchar(5000)) + '
)
INSERT INTO PLMColumnValue(EntityInstanceId, ColumnID, Value)
SELECT i.v_max_ei, isnull(t.id,''''), isnull(t.columnvalue,'''') 
FROM iterator i
LEFT JOIN ' + @temp_table2 + ' t ON t.row = i.v_countrownum
ORDER BY i.v_countrownum, t.DisplayOrder ';

print @v_sql;
exec (@v_sql);

WITH iterator AS (
    SELECT v_max_ei = @v_max_ei + N - 1
        ,v_countrownum = N
    FROM tally
    WHERE @v_max_ei + N - 1 <= @v_total_ei AND N <= @v_rownum
)
INSERT INTO ITEM_STG (INSTANCE_ID, ITEM_NUMBER, REV)
SELECT i.v_max_ei
    -- unpivot 
    , isnull(max(CASE c.Name WHEN 'number' THEN Value END),'')
    , isnull(max(CASE c.Name WHEN 'rev' THEN Value END),'')
FROM iterator i
LEFT JOIN PLMColumnValue cv ON cv.EntityInstanceId = i.v_max_ei
LEFT JOIN PLMColumn c ON cv.columnid = c.id 
    AND c.EntityId = @v_entityid 
    AND c.Name IN ( 'number' , 'rev')
GROUP BY i.v_max_ei;

-- check it

SELECT * FROM ITEM_STG;
SELECT * FROM PLMColumnValue;
Serg
  • 14,291
  • 2
  • 19
  • 40
  • Thank you for your valuable input, but I did not face any issues while uploading data into PLMEntityInstance table. The issue occurs when data in loaded in PLMColumnValue table. – Burhan Khalid Butt Jul 21 '16 at 09:07
  • You need to refactor the last loop which takes days to complete the same way. I can't give you exact code without at least some sample data. But this is the way to go, replacing loops with set based SQL gives orders of magnitude perfomance gain in MS SQL. – Serg Jul 21 '16 at 09:25
  • I can upload sample data on a shared location. – Burhan Khalid Butt Jul 21 '16 at 09:36
  • Ok can you share the data needed just for the last loop in a MSSQL consumable format? I see it needs only `@temp_table2` and some variables involved. And the result needed. – Serg Jul 21 '16 at 09:47
  • The link redirects to dropbox login. Can you share direct download link? – Serg Jul 21 '16 at 10:09
  • Please check now. Thanks – Burhan Khalid Butt Jul 21 '16 at 10:13
  • Sorry, it doesn't help. Data without column definitions are useless. I need an SQL script which creates the table, loads data and sets initial variables for the loop in question. – Serg Jul 21 '16 at 10:29
  • creation script is placed at the following link: https://dl.dropboxusercontent.com/u/96203352/Query/creation%20script%20of%20title%20block.sql. I have created an excel sheet which have column names. Its placed at the following link: https://dl.dropboxusercontent.com/u/96203352/Query/title_block_parts.xlsx. For uploading raw data I am using linking between oracle and sql server. Therefore I do not have an uploading script. However, I use sql server import/export feature. And please tell me, for which parameters you need values? – Burhan Khalid Butt Jul 21 '16 at 10:50
  • Are you sure? I see the `q2` cte includes ` from @temp_table2 where row = ` . But there's no `row` column in the '[dbo].[title_block]' . See http://stackoverflow.com/questions/5065357/how-to-generate-an-insert-script-for-an-existing-sql-server-table-that-includes for how to generate INSERT script in SSMS. – Serg Jul 21 '16 at 11:13
  • https://dl.dropboxusercontent.com/u/96203352/Query/Title%20Block%20schema%20and%20data.sql .The script contains schema and insert statements for first 1000 rows. Thank you for being so supportive and patient. – Burhan Khalid Butt Jul 21 '16 at 11:34
  • Sorry, there are no `id` and `columnvalue` columns in `[dbo].[title_block3]` which final loop is referencing. – Serg Jul 21 '16 at 11:50
  • Link for PlmColumnValue sql script https://dl.dropboxusercontent.com/u/96203352/Query/PLMColumnValue.sql . Link for PlmColumn script https://dl.dropboxusercontent.com/u/96203352/Query/PLMCOLUMN.sql . Link for item_stg sql script https://dl.dropboxusercontent.com/u/96203352/Query/Item_stg.sql You need to initialize parameter @v_entityid =14 . Thanks :) – Burhan Khalid Butt Jul 21 '16 at 12:30