Using TSQL it is very easy to automate extraction from Version One’s DBArchive product with its dbremoteblob program (It does require a username and password to access DBArchive and the program comes with DBArchive).
With a 4 core CPU I extracted 72000 documents a day, a limitation found was the programs used did not return status codes and the PCL to PDF program would stall if the PDF already existed.
I wrote a single blob extraction and then expanded to batch process using the date.
This calls external windows executables, so MSSQL needs to be able to run these programs and will require permissions to folder (Which you can customise in code)
Only PCL and Tiff extracts have been written, for others you will need to experiment.
PCL will extract multipage in a single file, but Tiffs will be written page by page with the first file containing BLOB metadata. I had no use for this so simply discard the first page.
I did find out I needed to change everything to PDF, for this I used FolderMill rather than re-extract and process.
As always replace anything with <> with your own system variables.
Credits:
PCL to PDF conversion program WinPCLtoPDF
Tiff Infraview
USE [V1Conversion] GO /****** Object: Table [dbo].[BLOB] Script Date: 15/12/2022 09:59:53 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO CREATE TABLE [dbo].[BLOB]( [id] [int] IDENTITY(1,1) NOT NULL, [BLOB_ID] [int] NOT NULL, [Complete] [char](10) NOT NULL, [TIFF] [char](10) NOT NULL, [Conversion_Status] [char](1) NOT NULL, [Archive_Table] [varchar](50) NOT NULL, CONSTRAINT [PK_BLOB] PRIMARY KEY CLUSTERED ( [id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO ALTER TABLE [dbo].[BLOB] ADD CONSTRAINT [DF_BLOB_Complete] DEFAULT ('N') FOR [Complete] GO ALTER TABLE [dbo].[BLOB] ADD CONSTRAINT [DF_BLOB_Conversion_Status] DEFAULT ('N') FOR [Conversion_Status] GO --------------------------------------------- USE [V1Conversion] GO /****** Object: StoredProcedure [dbo].[usp_export_BLOB] Script Date: 15/12/2022 09:59:36 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO -- ============================================= -- Author: Christopher Smith -- Create date: 17/11/2022 -- Description: Use Conversion tools to extract files from DBArchive -- ============================================= -- Change Log -- ============================================= -- Version 1.0 - Initial Version working for single extracts -- Version 1.0.1 - 21/11/2022 - Stop PCL to PDF process from hanging, starting to add process for processing a DBArchive at a time -- Version 1.1 - 30/11/2022 - Ability to export whole DBArchive, added ability to track Blobs already extracted -- ============================================= -- exec usp_export_BLOB '<BLOBID>','SINGLEBLOB' -- exec usp_export_BLOB '','MULTI','<V1DBArchiveTable>','2017-11-20' --MULTI -- ============================================= CREATE PROCEDURE [dbo].[usp_export_BLOB] @BLOB varchar(50) = '0',@ACTION VARCHAR(10) = '',@V1TABLE VARCHAR(50) = '',@DATEFROM datetime = '' AS BEGIN print 'Setup variables used by multiple actions' DECLARE @cmdstr VARCHAR(512) DECLARE @basepath VARCHAR(512)='<V1location>\extract_images\script_extracts\' DECLARE @EXTRACT_TYPE VARCHAR(512) DECLARE @UNIXDATEFROM INT = 1669822875 IF @ACTION = 'MULTI' BEGIN PRINT 'In MULTI' If @V1TABLE = '' GOTO EOF Else BEGIN If @DATEFROM = '' BEGIN set @DATEFROM = GETDATE() -90 END PRINT @DATEFROM SET @UNIXDATEFROM = (SELECT DATEDIFF(s, '1970-01-01 00:00:00', @DATEFROM)) PRINT @UNIXDATEFROM print 'Populate dbo.BLOB' print @V1TABLE IF EXISTS (SELECT * FROM dbarchive.sys.tables WHERE "name" = @V1TABLE) BEGIN --PRINT 'V1 Table Exists' print 'Declare Variables used by multi job' DECLARE @ArchiveTable varchar(255) = '<V1Server>.dbarchive.dbo.'+@V1TABLE DECLARE @ExportTSQL nvarchar(255) = '' DECLARE @ExportCount int = 0 DECLARE @ExportNum int = 1 print @ArchiveTable --select top 100 * from dbarchive.dbo.DBA_Purchase_Invoices_Test(nolock) DECLARE @V1Query varchar (512) = 'insert into dbo.BLOB (BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table)SELECT BLOB,''N'','''','''','''+@V1TABLE+''' from '+@ArchiveTable+' where BLOB NOT IN (select BLOB_ID from dbo.BLOB) and ARCH_DATE >= '''+TRY_CAST(@UNIXDATEFROM as nvarchar)+'''' print @V1Query print 'Insert new records into dbo.BLOB' exec (@V1Query) print 'Count number of records in table in Status N' set @ExportTSQL = N'select @ExportCount = COUNT(*) from dbo.BLOB where Complete = ''N'' and Archive_Table = '''+@V1TABLE+N'''' print @ExportTSQL Exec sp_executesql @ExportTSQL, N'@ExportCount INT OUTPUT', @ExportCount OUTPUT print @ExportCount --If nothing to do go to EOF If @ExportCount > 0 BEGIN print 'Create Directory for Conversion (May fail if the folder already exists)' set @cmdstr = 'mkdir '+@basepath+@V1TABLE+'\' EXEC xp_cmdshell @cmdstr print 'populate temporary table' create table #workingtable(id int NOT NULL,BLOB_ID int NOT NULL,Complete char(10) NOT NULL,TIFF char(10) NOT NULL,Conversion_Status char(1) NOT NULL,Archive_Table varchar(50) NOT NULL,Rec_num int NOT NULL) DECLARE @ExportTSQL2 NVARCHAR(max) = N'insert into #workingtable (id,BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table,Rec_num)(select id,BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table,ROW_NUMBER() OVER (ORDER BY id) as Rec_num from dbo.BLOB where Complete = ''N'' and Archive_Table = '''+@V1TABLE+N''')' print @ExportTSQL2 Exec (@ExportTSQL2) select * from #workingtable END ELSE GOTO EOF --If WHILE @ExportCount >= @ExportNum BEGIN print 'In While Loop' print @ExportCount print @ExportNum DECLARE @cmdstrextractmulti as NVARCHAR(512), @BLOBlookupsql as NVARCHAR(512) set @BLOBlookupsql = N'select @BLOB = TRY_CAST(BLOB_ID as nvarchar) from #workingtable where Rec_num = '''+TRY_CAST(@ExportNum as nvarchar)+N'''' print @BLOBlookupsql Exec sp_executesql @BLOBlookupsql, N'@BLOB VARCHAR(50) OUTPUT', @BLOB OUTPUT print @BLOB print 'Create Directory for DBArchive Extract' set @cmdstr = 'mkdir '+@basepath+@BLOB+'\' EXEC xp_cmdshell @cmdstr set @cmdstrextractmulti = '<V1location>\dbarchive\dbremoteblob -b '+@BLOB+' -c OBTAIN -o 31418 -p <v1password> -s <V1Server>.bsg.local -u <v1user> -z '+@basepath+@BLOB+'\' create table #outputmulti (output varchar(255) null) insert #outputmulti EXEC xp_cmdshell @cmdstrextractmulti set @EXTRACT_TYPE = (SELECT [output] FROM ( SELECT [output], ROW_NUMBER() OVER (ORDER BY [output]) AS Rownumber FROM #outputmulti ) results WHERE results.Rownumber = 2) print @EXTRACT_TYPE DROP TABLE #outputmulti IF @EXTRACT_TYPE = 'DL' BEGIN print 'IN TIFF' update dbo.BLOB set TIFF = 'Y',Conversion_Status = 'S' where BLOB_ID = @BLOB --Delete Page 1 which is a text file and causes i_view64 to return an error in text even though it has created the TIFF. Program found to always exits with code 0, unable to error trap DECLARE @cmdstrdeltiffmulti as VARCHAR(512) set @cmdstrdeltiffmulti = 'del '+@basepath+@BLOB+'\'+@BLOB+'.1' EXEC xp_cmdshell @cmdstrdeltiffmulti DECLARE @cmdstrtiffmulti as VARCHAR(512) set @cmdstrtiffmulti = '<V1location>\extract_images\iview460_x64\i_view64.exe /silent /cmdexit /tifc=4 /multitif=('+@basepath+@V1TABLE+'\'+@BLOB+'.tiff,'+@basepath+@BLOB+'\*.*)' EXEC xp_cmdshell @cmdstrtiffmulti --Clearup and Delete old files before exit DECLARE @cmdstrdelfoldermulti as VARCHAR(512) set @cmdstrdelfoldermulti = 'RD /S /Q '+@basepath+@BLOB+'\' EXEC xp_cmdshell @cmdstrdelfoldermulti --END TIFF END ELSE IF @EXTRACT_TYPE = 'PCL' BEGIN print 'IN PCL' update dbo.BLOB set TIFF = 'N',Conversion_Status = 'S' where BLOB_ID = @BLOB --IF PDF exists the process will hang, if PDF exists skip DECLARE @File_Exists2 INT,@BLOB_PDF_pathmulti varchar(255); set @BLOB_PDF_pathmulti = @basepath+'\'+@V1TABLE+'\'+@BLOB+'.pdf' --print @BLOB_PDF_pathmulti EXEC master.dbo.xp_fileexist @BLOB_PDF_pathmulti,@File_Exists2 OUT; If @File_Exists2 = '0' BEGIN print @File_Exists2 DECLARE @cmdstrpclmulti as VARCHAR(512) --Program always returns code 0 or hangs set @cmdstrpclmulti = '<V1location>\extract_images\images\WinPCLtoPDF.exe '+@basepath+@BLOB+'\'+@BLOB+' '+@basepath+'\'+@V1TABLE+'\'+@BLOB+'.pdf silent' EXEC xp_cmdshell @cmdstrpclmulti --Clearup and Delete old files before exit DECLARE @cmdstrdelfolder2multi as VARCHAR(512) set @cmdstrdelfolder2multi = 'RD /S /Q '+@basepath+@BLOB+'\' EXEC xp_cmdshell @cmdstrdelfolder2multi END ELSE BEGIN print 'PDF File already exists!' END --End PCL END update dbo.BLOB set Complete = 'C',Conversion_Status = 'F' where BLOB_ID = @BLOB set @ExportNum = @ExportNum + 1 END drop table #workingtable END ELSE BEGIN print 'Invalid DBArchive Table Provided' END END END IF @ACTION = 'SINGLEBLOB' BEGIN PRINT 'In SINGLEBLOB' --SELECT @BLOB If @BLOB = '0' GOTO EOF Else If @BLOB != '0' BEGIN print 'Setup Variables for Job' --DECLARE @resultas VARCHAR(512) print 'Create Directory for Extract' set @cmdstr = 'mkdir '+@basepath+@BLOB+'\' EXEC xp_cmdshell @cmdstr DECLARE @cmdstrextract as VARCHAR(512) set @cmdstrextract = '<V1location>\dbarchive\dbremoteblob -b '+@BLOB+' -c OBTAIN -o 31418 -p <v1password> -s <V1Server>.bsg.local -u <v1user> -z '+@basepath+@BLOB+'\' create table #output (output varchar(255) null) insert #output EXEC xp_cmdshell @cmdstrextract set @EXTRACT_TYPE = (SELECT [output] FROM ( SELECT [output], ROW_NUMBER() OVER (ORDER BY [output]) AS Rownumber FROM #output ) results WHERE results.Rownumber = 2) print @EXTRACT_TYPE IF @EXTRACT_TYPE = 'DL' BEGIN print 'IN TIFF' --Delete Page 1 which is a text file and causes i_view64 to return an error in text even though it has created the TIFF. Program found to always exits with code 0, unable to error trap DECLARE @cmdstrdeltiff as VARCHAR(512) set @cmdstrdeltiff = 'del '+@basepath+@BLOB+'\'+@BLOB+'.1' EXEC xp_cmdshell @cmdstrdeltiff DECLARE @cmdstrtiff as VARCHAR(512) set @cmdstrtiff = '<V1location>\extract_images\iview460_x64\i_view64.exe /silent /cmdexit /tifc=4 /multitif=('+@basepath+@BLOB+'.tiff,'+@basepath+@BLOB+'\*.*)' EXEC xp_cmdshell @cmdstrtiff --Clearup and Delete old files before exit DECLARE @cmdstrdelfolder as VARCHAR(512) set @cmdstrdelfolder = 'RD /S /Q '+@basepath+@BLOB+'\' EXEC xp_cmdshell @cmdstrdelfolder --END TIFF END ELSE IF @EXTRACT_TYPE = 'PCL' BEGIN print 'IN PCL' --IF PDF exists the process will hang, if PDF exists skip DECLARE @File_Exists INT,@BLOB_PDF_path varchar(255); set @BLOB_PDF_path = @basepath+@BLOB+'.pdf' --print @BLOB_PDF_path EXEC master.dbo.xp_fileexist @BLOB_PDF_path, @File_Exists OUT; If @File_Exists = '0' BEGIN print @File_Exists DECLARE @cmdstrpcl as VARCHAR(512) --Program always returns code 0 or hangs set @cmdstrpcl = '<V1location>\extract_images\images\WinPCLtoPDF.exe '+@basepath+@BLOB+'\'+@BLOB+' '+@basepath+@BLOB+'.pdf silent' EXEC xp_cmdshell @cmdstrpcl --Clearup and Delete old files before exit DECLARE @cmdstrdelfolder2 as VARCHAR(512) set @cmdstrdelfolder2 = 'RD /S /Q '+@basepath+@BLOB+'\' EXEC xp_cmdshell @cmdstrdelfolder2 END ELSE BEGIN print 'PDF File already exists!' END --End PCL END --END ACTION = SINGLEBLOB END END EOF: END GO