Use TSQL to extract V1 DBArchive Blobs

Using TSQL it is very easy to automate extraction from Version One’s DBArchive product with its dbremoteblob program (It does require a username and password to access DBArchive and the program comes with DBArchive).
With a 4 core CPU I extracted 72000 documents a day, a limitation found was the programs used did not return status codes and the PCL to PDF program would stall if the PDF already existed.
I wrote a single blob extraction and then expanded to batch process using the date.
This calls external windows executables, so MSSQL needs to be able to run these programs and will require permissions to folder (Which you can customise in code)
Only PCL and Tiff extracts have been written, for others you will need to experiment.
PCL will extract multipage in a single file, but Tiffs will be written page by page with the first file containing BLOB metadata. I had no use for this so simply discard the first page.
I did find out I needed to change everything to PDF, for this I used FolderMill rather than re-extract and process.
As always replace anything with <> with your own system variables.

Credits:
PCL to PDF conversion program WinPCLtoPDF
Tiff Infraview

USE [V1Conversion]
GO
 
/****** Object:  Table [dbo].[BLOB]    Script Date: 15/12/2022 09:59:53 ******/
SET ANSI_NULLS ON
GO
 
SET QUOTED_IDENTIFIER ON
GO
 
CREATE TABLE [dbo].[BLOB](
               [id] [int] IDENTITY(1,1) NOT NULL,
               [BLOB_ID] [int] NOT NULL,
               [Complete] [char](10) NOT NULL,
               [TIFF] [char](10) NOT NULL,
               [Conversion_Status] [char](1) NOT NULL,
               [Archive_Table] [varchar](50) NOT NULL,
CONSTRAINT [PK_BLOB] PRIMARY KEY CLUSTERED
(
               [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
 
ALTER TABLE [dbo].[BLOB] ADD  CONSTRAINT [DF_BLOB_Complete]  DEFAULT ('N') FOR [Complete]
GO
 
ALTER TABLE [dbo].[BLOB] ADD  CONSTRAINT [DF_BLOB_Conversion_Status]  DEFAULT ('N') FOR [Conversion_Status]
GO
---------------------------------------------
USE [V1Conversion]
GO
 
/****** Object:  StoredProcedure [dbo].[usp_export_BLOB]    Script Date: 15/12/2022 09:59:36 ******/
SET ANSI_NULLS ON
GO
 
SET QUOTED_IDENTIFIER ON
GO
 
-- =============================================
-- Author: Christopher Smith
-- Create date: 17/11/2022
-- Description: Use Conversion tools to extract files from DBArchive
-- =============================================
-- Change Log
-- =============================================
-- Version 1.0 - Initial Version working for single extracts
-- Version 1.0.1 - 21/11/2022 - Stop PCL to PDF process from hanging, starting to add process for processing a DBArchive at a time
-- Version 1.1 - 30/11/2022 - Ability to export whole DBArchive, added ability to track Blobs already extracted
-- =============================================
-- exec usp_export_BLOB '<BLOBID>','SINGLEBLOB'
-- exec usp_export_BLOB '','MULTI','<V1DBArchiveTable>','2017-11-20' --MULTI
-- =============================================
CREATE PROCEDURE [dbo].[usp_export_BLOB]
               @BLOB varchar(50) = '0',@ACTION VARCHAR(10) = '',@V1TABLE VARCHAR(50) = '',@DATEFROM datetime = ''
AS
BEGIN
print 'Setup variables used by multiple actions'
DECLARE @cmdstr VARCHAR(512)
DECLARE @basepath VARCHAR(512)='<V1location>\extract_images\script_extracts\'
DECLARE @EXTRACT_TYPE VARCHAR(512)
DECLARE @UNIXDATEFROM INT = 1669822875
 
IF @ACTION = 'MULTI'
BEGIN
PRINT 'In MULTI'
               If @V1TABLE = '' GOTO EOF
                              Else
                                             BEGIN
If @DATEFROM = ''
               BEGIN
               set @DATEFROM = GETDATE() -90
               END
PRINT @DATEFROM
SET @UNIXDATEFROM = (SELECT DATEDIFF(s, '1970-01-01 00:00:00', @DATEFROM))
PRINT @UNIXDATEFROM
print 'Populate dbo.BLOB'
print @V1TABLE
 
IF EXISTS (SELECT * FROM dbarchive.sys.tables WHERE "name" = @V1TABLE)
BEGIN
--PRINT 'V1 Table Exists'
print 'Declare Variables used by multi job'
               DECLARE @ArchiveTable varchar(255) = '<V1Server>.dbarchive.dbo.'+@V1TABLE
               DECLARE @ExportTSQL nvarchar(255) = ''
               DECLARE @ExportCount int = 0
               DECLARE @ExportNum int = 1
               print @ArchiveTable
               --select top 100 * from dbarchive.dbo.DBA_Purchase_Invoices_Test(nolock)
               DECLARE @V1Query varchar (512) = 'insert into dbo.BLOB (BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table)SELECT BLOB,''N'','''','''','''+@V1TABLE+''' from '+@ArchiveTable+' where BLOB NOT IN (select BLOB_ID from dbo.BLOB) and ARCH_DATE >= '''+TRY_CAST(@UNIXDATEFROM as nvarchar)+''''
print @V1Query
print 'Insert new records into dbo.BLOB'
               exec (@V1Query)
print 'Count number of records in table in Status N'
               set @ExportTSQL = N'select @ExportCount = COUNT(*) from dbo.BLOB where Complete = ''N'' and Archive_Table = '''+@V1TABLE+N''''
               print @ExportTSQL
               Exec sp_executesql @ExportTSQL, N'@ExportCount INT OUTPUT', @ExportCount OUTPUT
               print @ExportCount
--If nothing to do go to EOF
               If @ExportCount > 0
               BEGIN
 
print 'Create Directory for Conversion (May fail if the folder already exists)'
                              set @cmdstr = 'mkdir '+@basepath+@V1TABLE+'\'
                              EXEC xp_cmdshell @cmdstr
print 'populate temporary table'
create table #workingtable(id int NOT NULL,BLOB_ID int NOT NULL,Complete char(10) NOT NULL,TIFF char(10) NOT NULL,Conversion_Status char(1) NOT NULL,Archive_Table varchar(50) NOT NULL,Rec_num int NOT NULL)
DECLARE @ExportTSQL2 NVARCHAR(max) = N'insert into #workingtable (id,BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table,Rec_num)(select id,BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table,ROW_NUMBER() OVER (ORDER BY id) as Rec_num from dbo.BLOB where Complete = ''N'' and Archive_Table = '''+@V1TABLE+N''')'
print @ExportTSQL2
Exec (@ExportTSQL2)
select * from #workingtable
               END
               ELSE GOTO EOF
--If
               WHILE @ExportCount >= @ExportNum
               BEGIN
               print 'In While Loop'
               print @ExportCount
               print @ExportNum
               DECLARE @cmdstrextractmulti as NVARCHAR(512), @BLOBlookupsql as NVARCHAR(512)
               set @BLOBlookupsql = N'select @BLOB = TRY_CAST(BLOB_ID as nvarchar) from #workingtable where Rec_num = '''+TRY_CAST(@ExportNum as nvarchar)+N''''
               print @BLOBlookupsql
               Exec sp_executesql @BLOBlookupsql, N'@BLOB VARCHAR(50) OUTPUT', @BLOB OUTPUT
               print @BLOB
print 'Create Directory for DBArchive Extract'
                              set @cmdstr = 'mkdir '+@basepath+@BLOB+'\'
                              EXEC xp_cmdshell @cmdstr
               set @cmdstrextractmulti = '<V1location>\dbarchive\dbremoteblob -b '+@BLOB+' -c OBTAIN -o 31418 -p <v1password> -s <V1Server>.bsg.local -u <v1user> -z '+@basepath+@BLOB+'\'
                                create table #outputmulti (output varchar(255) null)
                                             insert #outputmulti EXEC xp_cmdshell @cmdstrextractmulti
                                                            set @EXTRACT_TYPE = (SELECT [output] FROM
                                                            (
                                                                           SELECT [output], ROW_NUMBER() OVER (ORDER BY [output]) AS Rownumber
                                                                                          FROM #outputmulti
                                                            ) results
                                                                           WHERE results.Rownumber = 2)
                                                                           print @EXTRACT_TYPE
               DROP TABLE #outputmulti
                                                            IF @EXTRACT_TYPE = 'DL'
                                                                           BEGIN
print 'IN TIFF'
 
                              update dbo.BLOB
                              set TIFF = 'Y',Conversion_Status = 'S'
                              where BLOB_ID = @BLOB
--Delete Page 1 which is a text file and causes i_view64 to return an error in text even though it has created the TIFF.  Program found to always exits with code 0, unable to error trap
DECLARE @cmdstrdeltiffmulti as VARCHAR(512)
               set @cmdstrdeltiffmulti = 'del '+@basepath+@BLOB+'\'+@BLOB+'.1'
                              EXEC xp_cmdshell @cmdstrdeltiffmulti
 
DECLARE @cmdstrtiffmulti as VARCHAR(512)
               set @cmdstrtiffmulti = '<V1location>\extract_images\iview460_x64\i_view64.exe /silent /cmdexit /tifc=4 /multitif=('+@basepath+@V1TABLE+'\'+@BLOB+'.tiff,'+@basepath+@BLOB+'\*.*)'
                              EXEC xp_cmdshell @cmdstrtiffmulti
 
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfoldermulti as VARCHAR(512)
               set @cmdstrdelfoldermulti = 'RD /S /Q '+@basepath+@BLOB+'\'
                              EXEC xp_cmdshell @cmdstrdelfoldermulti
 
--END TIFF
                              END
                              ELSE IF @EXTRACT_TYPE = 'PCL'
                              BEGIN
print 'IN PCL'
 
                              update dbo.BLOB
                              set TIFF = 'N',Conversion_Status = 'S'
                              where BLOB_ID = @BLOB
--IF PDF exists the process will hang, if PDF exists skip
DECLARE @File_Exists2 INT,@BLOB_PDF_pathmulti varchar(255);
set @BLOB_PDF_pathmulti = @basepath+'\'+@V1TABLE+'\'+@BLOB+'.pdf'
--print @BLOB_PDF_pathmulti
EXEC master.dbo.xp_fileexist @BLOB_PDF_pathmulti,@File_Exists2 OUT;
               If @File_Exists2 = '0'
                              BEGIN
               print @File_Exists2
DECLARE @cmdstrpclmulti as VARCHAR(512)
--Program always returns code 0 or hangs
               set @cmdstrpclmulti = '<V1location>\extract_images\images\WinPCLtoPDF.exe '+@basepath+@BLOB+'\'+@BLOB+' '+@basepath+'\'+@V1TABLE+'\'+@BLOB+'.pdf silent'
                              EXEC xp_cmdshell @cmdstrpclmulti
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfolder2multi as VARCHAR(512)
               set @cmdstrdelfolder2multi = 'RD /S /Q '+@basepath+@BLOB+'\'
                              EXEC xp_cmdshell @cmdstrdelfolder2multi
                              END
                              ELSE
                              BEGIN
                              print 'PDF File already exists!'
                              END
                              --End PCL
                              END
 
                              update dbo.BLOB
                              set Complete = 'C',Conversion_Status = 'F'
                              where BLOB_ID = @BLOB
               set @ExportNum = @ExportNum + 1
               END
               drop table #workingtable
END
ELSE
               BEGIN
                              print 'Invalid DBArchive Table Provided'
               END
                                             END
END
IF @ACTION = 'SINGLEBLOB'
BEGIN
PRINT 'In SINGLEBLOB'
--SELECT @BLOB
               If @BLOB = '0' GOTO EOF
                              Else If @BLOB != '0'
                                             BEGIN
print 'Setup Variables for Job'
--DECLARE @resultas VARCHAR(512)
                                                            print 'Create Directory for Extract'
               set @cmdstr = 'mkdir '+@basepath+@BLOB+'\'
                              EXEC xp_cmdshell @cmdstr
 
DECLARE @cmdstrextract as VARCHAR(512)
               set @cmdstrextract = '<V1location>\dbarchive\dbremoteblob -b '+@BLOB+' -c OBTAIN -o 31418 -p <v1password> -s <V1Server>.bsg.local -u <v1user> -z '+@basepath+@BLOB+'\'
                                create table #output (output varchar(255) null)
                                             insert #output EXEC xp_cmdshell @cmdstrextract
                                                            set @EXTRACT_TYPE = (SELECT [output] FROM
                                                            (
                                                                           SELECT [output], ROW_NUMBER() OVER (ORDER BY [output]) AS Rownumber
                                                                                          FROM #output
                                                            ) results
                                                                           WHERE results.Rownumber = 2)
                                                                           print @EXTRACT_TYPE
 
                                                            IF @EXTRACT_TYPE = 'DL'
                                                                           BEGIN
print 'IN TIFF'
--Delete Page 1 which is a text file and causes i_view64 to return an error in text even though it has created the TIFF.  Program found to always exits with code 0, unable to error trap
 
DECLARE @cmdstrdeltiff as VARCHAR(512)
               set @cmdstrdeltiff = 'del '+@basepath+@BLOB+'\'+@BLOB+'.1'
                              EXEC xp_cmdshell @cmdstrdeltiff
 
DECLARE @cmdstrtiff as VARCHAR(512)
               set @cmdstrtiff = '<V1location>\extract_images\iview460_x64\i_view64.exe /silent /cmdexit /tifc=4 /multitif=('+@basepath+@BLOB+'.tiff,'+@basepath+@BLOB+'\*.*)'
                              EXEC xp_cmdshell @cmdstrtiff
 
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfolder as VARCHAR(512)
               set @cmdstrdelfolder = 'RD /S /Q '+@basepath+@BLOB+'\'
                              EXEC xp_cmdshell @cmdstrdelfolder
--END TIFF
                              END
                              ELSE IF @EXTRACT_TYPE = 'PCL'
                              BEGIN
print 'IN PCL'
--IF PDF exists the process will hang, if PDF exists skip
DECLARE @File_Exists INT,@BLOB_PDF_path varchar(255);
set @BLOB_PDF_path = @basepath+@BLOB+'.pdf'
--print @BLOB_PDF_path
EXEC master.dbo.xp_fileexist @BLOB_PDF_path,
@File_Exists OUT;
               If @File_Exists = '0'
                              BEGIN
               print @File_Exists
DECLARE @cmdstrpcl as VARCHAR(512)
--Program always returns code 0 or hangs
               set @cmdstrpcl = '<V1location>\extract_images\images\WinPCLtoPDF.exe '+@basepath+@BLOB+'\'+@BLOB+' '+@basepath+@BLOB+'.pdf silent'
                              EXEC xp_cmdshell @cmdstrpcl
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfolder2 as VARCHAR(512)
               set @cmdstrdelfolder2 = 'RD /S /Q '+@basepath+@BLOB+'\'
                              EXEC xp_cmdshell @cmdstrdelfolder2
                              END
                              ELSE
                              BEGIN
                              print 'PDF File already exists!'
                              END
                              --End PCL
                              END
--END ACTION = SINGLEBLOB
END
END
EOF:
END
GO