Using TSQL it is very easy to automate extraction from Version One’s DBArchive product with its dbremoteblob program (It does require a username and password to access DBArchive and the program comes with DBArchive).
With a 4 core CPU I extracted 72000 documents a day, a limitation found was the programs used did not return status codes and the PCL to PDF program would stall if the PDF already existed.
I wrote a single blob extraction and then expanded to batch process using the date.
This calls external windows executables, so MSSQL needs to be able to run these programs and will require permissions to folder (Which you can customise in code)
Only PCL and Tiff extracts have been written, for others you will need to experiment.
PCL will extract multipage in a single file, but Tiffs will be written page by page with the first file containing BLOB metadata. I had no use for this so simply discard the first page.
I did find out I needed to change everything to PDF, for this I used FolderMill rather than re-extract and process.
As always replace anything with <> with your own system variables.
Credits:
PCL to PDF conversion program WinPCLtoPDF
Tiff Infraview
USE [V1Conversion]
GO
/****** Object: Table [dbo].[BLOB] Script Date: 15/12/2022 09:59:53 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[BLOB](
[id] [int] IDENTITY(1,1) NOT NULL,
[BLOB_ID] [int] NOT NULL,
[Complete] [char](10) NOT NULL,
[TIFF] [char](10) NOT NULL,
[Conversion_Status] [char](1) NOT NULL,
[Archive_Table] [varchar](50) NOT NULL,
CONSTRAINT [PK_BLOB] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[BLOB] ADD CONSTRAINT [DF_BLOB_Complete] DEFAULT ('N') FOR [Complete]
GO
ALTER TABLE [dbo].[BLOB] ADD CONSTRAINT [DF_BLOB_Conversion_Status] DEFAULT ('N') FOR [Conversion_Status]
GO
---------------------------------------------
USE [V1Conversion]
GO
/****** Object: StoredProcedure [dbo].[usp_export_BLOB] Script Date: 15/12/2022 09:59:36 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: Christopher Smith
-- Create date: 17/11/2022
-- Description: Use Conversion tools to extract files from DBArchive
-- =============================================
-- Change Log
-- =============================================
-- Version 1.0 - Initial Version working for single extracts
-- Version 1.0.1 - 21/11/2022 - Stop PCL to PDF process from hanging, starting to add process for processing a DBArchive at a time
-- Version 1.1 - 30/11/2022 - Ability to export whole DBArchive, added ability to track Blobs already extracted
-- =============================================
-- exec usp_export_BLOB '<BLOBID>','SINGLEBLOB'
-- exec usp_export_BLOB '','MULTI','<V1DBArchiveTable>','2017-11-20' --MULTI
-- =============================================
CREATE PROCEDURE [dbo].[usp_export_BLOB]
@BLOB varchar(50) = '0',@ACTION VARCHAR(10) = '',@V1TABLE VARCHAR(50) = '',@DATEFROM datetime = ''
AS
BEGIN
print 'Setup variables used by multiple actions'
DECLARE @cmdstr VARCHAR(512)
DECLARE @basepath VARCHAR(512)='<V1location>\extract_images\script_extracts\'
DECLARE @EXTRACT_TYPE VARCHAR(512)
DECLARE @UNIXDATEFROM INT = 1669822875
IF @ACTION = 'MULTI'
BEGIN
PRINT 'In MULTI'
If @V1TABLE = '' GOTO EOF
Else
BEGIN
If @DATEFROM = ''
BEGIN
set @DATEFROM = GETDATE() -90
END
PRINT @DATEFROM
SET @UNIXDATEFROM = (SELECT DATEDIFF(s, '1970-01-01 00:00:00', @DATEFROM))
PRINT @UNIXDATEFROM
print 'Populate dbo.BLOB'
print @V1TABLE
IF EXISTS (SELECT * FROM dbarchive.sys.tables WHERE "name" = @V1TABLE)
BEGIN
--PRINT 'V1 Table Exists'
print 'Declare Variables used by multi job'
DECLARE @ArchiveTable varchar(255) = '<V1Server>.dbarchive.dbo.'+@V1TABLE
DECLARE @ExportTSQL nvarchar(255) = ''
DECLARE @ExportCount int = 0
DECLARE @ExportNum int = 1
print @ArchiveTable
--select top 100 * from dbarchive.dbo.DBA_Purchase_Invoices_Test(nolock)
DECLARE @V1Query varchar (512) = 'insert into dbo.BLOB (BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table)SELECT BLOB,''N'','''','''','''+@V1TABLE+''' from '+@ArchiveTable+' where BLOB NOT IN (select BLOB_ID from dbo.BLOB) and ARCH_DATE >= '''+TRY_CAST(@UNIXDATEFROM as nvarchar)+''''
print @V1Query
print 'Insert new records into dbo.BLOB'
exec (@V1Query)
print 'Count number of records in table in Status N'
set @ExportTSQL = N'select @ExportCount = COUNT(*) from dbo.BLOB where Complete = ''N'' and Archive_Table = '''+@V1TABLE+N''''
print @ExportTSQL
Exec sp_executesql @ExportTSQL, N'@ExportCount INT OUTPUT', @ExportCount OUTPUT
print @ExportCount
--If nothing to do go to EOF
If @ExportCount > 0
BEGIN
print 'Create Directory for Conversion (May fail if the folder already exists)'
set @cmdstr = 'mkdir '+@basepath+@V1TABLE+'\'
EXEC xp_cmdshell @cmdstr
print 'populate temporary table'
create table #workingtable(id int NOT NULL,BLOB_ID int NOT NULL,Complete char(10) NOT NULL,TIFF char(10) NOT NULL,Conversion_Status char(1) NOT NULL,Archive_Table varchar(50) NOT NULL,Rec_num int NOT NULL)
DECLARE @ExportTSQL2 NVARCHAR(max) = N'insert into #workingtable (id,BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table,Rec_num)(select id,BLOB_ID,Complete,TIFF,Conversion_Status,Archive_Table,ROW_NUMBER() OVER (ORDER BY id) as Rec_num from dbo.BLOB where Complete = ''N'' and Archive_Table = '''+@V1TABLE+N''')'
print @ExportTSQL2
Exec (@ExportTSQL2)
select * from #workingtable
END
ELSE GOTO EOF
--If
WHILE @ExportCount >= @ExportNum
BEGIN
print 'In While Loop'
print @ExportCount
print @ExportNum
DECLARE @cmdstrextractmulti as NVARCHAR(512), @BLOBlookupsql as NVARCHAR(512)
set @BLOBlookupsql = N'select @BLOB = TRY_CAST(BLOB_ID as nvarchar) from #workingtable where Rec_num = '''+TRY_CAST(@ExportNum as nvarchar)+N''''
print @BLOBlookupsql
Exec sp_executesql @BLOBlookupsql, N'@BLOB VARCHAR(50) OUTPUT', @BLOB OUTPUT
print @BLOB
print 'Create Directory for DBArchive Extract'
set @cmdstr = 'mkdir '+@basepath+@BLOB+'\'
EXEC xp_cmdshell @cmdstr
set @cmdstrextractmulti = '<V1location>\dbarchive\dbremoteblob -b '+@BLOB+' -c OBTAIN -o 31418 -p <v1password> -s <V1Server>.bsg.local -u <v1user> -z '+@basepath+@BLOB+'\'
create table #outputmulti (output varchar(255) null)
insert #outputmulti EXEC xp_cmdshell @cmdstrextractmulti
set @EXTRACT_TYPE = (SELECT [output] FROM
(
SELECT [output], ROW_NUMBER() OVER (ORDER BY [output]) AS Rownumber
FROM #outputmulti
) results
WHERE results.Rownumber = 2)
print @EXTRACT_TYPE
DROP TABLE #outputmulti
IF @EXTRACT_TYPE = 'DL'
BEGIN
print 'IN TIFF'
update dbo.BLOB
set TIFF = 'Y',Conversion_Status = 'S'
where BLOB_ID = @BLOB
--Delete Page 1 which is a text file and causes i_view64 to return an error in text even though it has created the TIFF. Program found to always exits with code 0, unable to error trap
DECLARE @cmdstrdeltiffmulti as VARCHAR(512)
set @cmdstrdeltiffmulti = 'del '+@basepath+@BLOB+'\'+@BLOB+'.1'
EXEC xp_cmdshell @cmdstrdeltiffmulti
DECLARE @cmdstrtiffmulti as VARCHAR(512)
set @cmdstrtiffmulti = '<V1location>\extract_images\iview460_x64\i_view64.exe /silent /cmdexit /tifc=4 /multitif=('+@basepath+@V1TABLE+'\'+@BLOB+'.tiff,'+@basepath+@BLOB+'\*.*)'
EXEC xp_cmdshell @cmdstrtiffmulti
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfoldermulti as VARCHAR(512)
set @cmdstrdelfoldermulti = 'RD /S /Q '+@basepath+@BLOB+'\'
EXEC xp_cmdshell @cmdstrdelfoldermulti
--END TIFF
END
ELSE IF @EXTRACT_TYPE = 'PCL'
BEGIN
print 'IN PCL'
update dbo.BLOB
set TIFF = 'N',Conversion_Status = 'S'
where BLOB_ID = @BLOB
--IF PDF exists the process will hang, if PDF exists skip
DECLARE @File_Exists2 INT,@BLOB_PDF_pathmulti varchar(255);
set @BLOB_PDF_pathmulti = @basepath+'\'+@V1TABLE+'\'+@BLOB+'.pdf'
--print @BLOB_PDF_pathmulti
EXEC master.dbo.xp_fileexist @BLOB_PDF_pathmulti,@File_Exists2 OUT;
If @File_Exists2 = '0'
BEGIN
print @File_Exists2
DECLARE @cmdstrpclmulti as VARCHAR(512)
--Program always returns code 0 or hangs
set @cmdstrpclmulti = '<V1location>\extract_images\images\WinPCLtoPDF.exe '+@basepath+@BLOB+'\'+@BLOB+' '+@basepath+'\'+@V1TABLE+'\'+@BLOB+'.pdf silent'
EXEC xp_cmdshell @cmdstrpclmulti
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfolder2multi as VARCHAR(512)
set @cmdstrdelfolder2multi = 'RD /S /Q '+@basepath+@BLOB+'\'
EXEC xp_cmdshell @cmdstrdelfolder2multi
END
ELSE
BEGIN
print 'PDF File already exists!'
END
--End PCL
END
update dbo.BLOB
set Complete = 'C',Conversion_Status = 'F'
where BLOB_ID = @BLOB
set @ExportNum = @ExportNum + 1
END
drop table #workingtable
END
ELSE
BEGIN
print 'Invalid DBArchive Table Provided'
END
END
END
IF @ACTION = 'SINGLEBLOB'
BEGIN
PRINT 'In SINGLEBLOB'
--SELECT @BLOB
If @BLOB = '0' GOTO EOF
Else If @BLOB != '0'
BEGIN
print 'Setup Variables for Job'
--DECLARE @resultas VARCHAR(512)
print 'Create Directory for Extract'
set @cmdstr = 'mkdir '+@basepath+@BLOB+'\'
EXEC xp_cmdshell @cmdstr
DECLARE @cmdstrextract as VARCHAR(512)
set @cmdstrextract = '<V1location>\dbarchive\dbremoteblob -b '+@BLOB+' -c OBTAIN -o 31418 -p <v1password> -s <V1Server>.bsg.local -u <v1user> -z '+@basepath+@BLOB+'\'
create table #output (output varchar(255) null)
insert #output EXEC xp_cmdshell @cmdstrextract
set @EXTRACT_TYPE = (SELECT [output] FROM
(
SELECT [output], ROW_NUMBER() OVER (ORDER BY [output]) AS Rownumber
FROM #output
) results
WHERE results.Rownumber = 2)
print @EXTRACT_TYPE
IF @EXTRACT_TYPE = 'DL'
BEGIN
print 'IN TIFF'
--Delete Page 1 which is a text file and causes i_view64 to return an error in text even though it has created the TIFF. Program found to always exits with code 0, unable to error trap
DECLARE @cmdstrdeltiff as VARCHAR(512)
set @cmdstrdeltiff = 'del '+@basepath+@BLOB+'\'+@BLOB+'.1'
EXEC xp_cmdshell @cmdstrdeltiff
DECLARE @cmdstrtiff as VARCHAR(512)
set @cmdstrtiff = '<V1location>\extract_images\iview460_x64\i_view64.exe /silent /cmdexit /tifc=4 /multitif=('+@basepath+@BLOB+'.tiff,'+@basepath+@BLOB+'\*.*)'
EXEC xp_cmdshell @cmdstrtiff
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfolder as VARCHAR(512)
set @cmdstrdelfolder = 'RD /S /Q '+@basepath+@BLOB+'\'
EXEC xp_cmdshell @cmdstrdelfolder
--END TIFF
END
ELSE IF @EXTRACT_TYPE = 'PCL'
BEGIN
print 'IN PCL'
--IF PDF exists the process will hang, if PDF exists skip
DECLARE @File_Exists INT,@BLOB_PDF_path varchar(255);
set @BLOB_PDF_path = @basepath+@BLOB+'.pdf'
--print @BLOB_PDF_path
EXEC master.dbo.xp_fileexist @BLOB_PDF_path,
@File_Exists OUT;
If @File_Exists = '0'
BEGIN
print @File_Exists
DECLARE @cmdstrpcl as VARCHAR(512)
--Program always returns code 0 or hangs
set @cmdstrpcl = '<V1location>\extract_images\images\WinPCLtoPDF.exe '+@basepath+@BLOB+'\'+@BLOB+' '+@basepath+@BLOB+'.pdf silent'
EXEC xp_cmdshell @cmdstrpcl
--Clearup and Delete old files before exit
DECLARE @cmdstrdelfolder2 as VARCHAR(512)
set @cmdstrdelfolder2 = 'RD /S /Q '+@basepath+@BLOB+'\'
EXEC xp_cmdshell @cmdstrdelfolder2
END
ELSE
BEGIN
print 'PDF File already exists!'
END
--End PCL
END
--END ACTION = SINGLEBLOB
END
END
EOF:
END
GO