#PowerShell 4.0 Script #Digital Data #Ludwig-Rinn-Strasse 16 #35452 Heuchelheim #Tel.: 0641 / 202360 #E-Mail: info@digitaldata.works #Version Number 1.0.0.0 #Version Date 20.11.2020 #Requires –Version 4.0 #-----------------------------------------------------------------------------------------------------# ######################################## check for arguments ########################################## #-----------------------------------------------------------------------------------------------------# #-----------------------------------------------------------------------------------------------------# ################################## add additional buildin assemblys ################################### #-----------------------------------------------------------------------------------------------------# #-----------------------------------------------------------------------------------------------------# ############################################ set variables ############################################ #-----------------------------------------------------------------------------------------------------# [string]$PDFExtractorDLL = "E:\itextsharp\itextsharp.dll" [string]$SourcePath = "E:\itextsharp\in" [string]$ArchivPath = "E:\itextsharp\out" [string]$ErrorPath = "E:\itextsharp\error" [string]$SourceFileExtension = "*.pdf" [string]$DBSQLConnectServer = "172.24.12.41\tests" [string]$DBSQLConnectDatabase = "DD_Test" [string]$DBSQLConnectUser = "sa" [string]$DBSQLConnectPassword = "dd" [string]$DBSQLConnectIntegratedSecurity = $false [string]$DBSQLQueryHead = "INSERT INTO [itextsharp] (FILENAME, FILECONTENT)" [int]$Counter = 0 #-----------------------------------------------------------------------------------------------------# ########################################### preparing part ############################################ #-----------------------------------------------------------------------------------------------------# #Clear Console Content Clear-Host #get all relevant files [Array]$Items = Get-ChildItem -Path $SourcePath -Filter $SourceFileExtension #-----------------------------------------------------------------------------------------------------# ############################################# main part ############################################### #-----------------------------------------------------------------------------------------------------# IF ($Items.Count -ge 1) { Write-Host "Found" $Items.Count $SourceFileExtension.Replace("*.","")"Files!" TRY { $DBSQLConnection = New-Object System.Data.SqlClient.SqlConnection $DBSQLConnection.ConnectionString = "Server = $DBSQLConnectServer; uid=$DBSQLConnectUser; pwd=$DBSQLConnectPassword; Database = $DBSQLConnectDatabase; Integrated Security = $DBSQLConnectIntegratedSecurity" $DBSQLCommand = New-Object System.Data.SqlClient.SqlCommand $DBSQLCommand.Connection = $DBSQLConnection } #end try CATCH { Write-Host "Cannot prepare db connect!" Write-Host $DBSQLConnectServer Write-Host $DBSQLConnectDatabase Write-Host $DBSQLConnectUser Write-Host $DBSQLConnectPassword Write-Host $DBSQLConnectIntegratedSecurity Write-Host $DBSQLQueryHead Write-Error $Error[0] Remove-Variable * -ErrorAction SilentlyContinue $Error.Clear() EXIT } #end catch FOREACH ($Item in $Items) { [int]$Counter++ | Out-Null Write-Host "" Write-Host "=================================" Write-Host "---------------------------------" Write-Host "Item" $Counter "of" $Items.count Write-Host "Processing File:" $Item TRY { $PDFExtractJob = Start-Job -ScriptBlock { $PDFExtractorDLL = $args[0] $ItemFullName = $args[1] Add-Type -path $PDFExtractorDLL $PDFReader = [iTextSharp.text.pdf.parser.PdfTextExtractor] $PDFExtract = $NULL $PDFExtract = $PDFReader::GetTextFromPage($ItemFullName,1) $PDFExtract = $PDFExtract.TrimStart() $PDFExtract = $PDFExtract.TrimEnd() $PDFExtract = $PDFExtract.Replace("'","") $PDFExtract = $PDFExtract.Replace('"','') $PDFExtract = $PDFExtract.Replace("‘","") $PDFExtract = $PDFExtract.Replace("’","") $PDFExtract = $PDFExtract.Replace("·","") Write-Output $PDFExtract } -ArgumentList $PDFExtractorDLL, $($Item.FullName) $PDFExtract = Receive-Job -Job $PDFExtractJob -Wait } #end try CATCH { Write-Host "Cannot read file fulltext!" Write-Error $Error[0] Remove-Variable * -ErrorAction SilentlyContinue $Error.Clear() } #end catch IF ($PDFExtract.Length -gt 20) { Write-Host "File has a valid Fulltext!" TRY { $DBSQLQuery = $NULL $DBSQLQuery = $DBSQLQueryHead $DBSQLQuery = $DBSQLQuery + "VALUES ('$($item.BaseName)', '$PDFExtract');" Write-Host "Executing SQL Query..." $DBSQLCommand = New-Object System.Data.SqlClient.SqlCommand $DBSQLCommand.Connection = $DBSQLConnection $DBSQLCommand.CommandText = $DBSQLQuery $DBSQLConnection.Open() $DBSQLCommand.ExecuteNonQuery() | Out-Null $DBSQLConnection.Close() Move-Item -Path $($Item.fullname) -Destination $ArchivPath -Force Write-Host "... done!" } #end try CATCH { $LastErrorQuery = $DBSQLQuery Move-Item -path $($Item.fullname) -Destination $ErrorPath -Force $DBSQLConnection.Close() Write-Host "Cannot insert to db!" Write-Host "Moving file to error path." Write-Error $Error[0] $Error.Clear() } #end catch Write-Host "---------------------------------" Write-Host "=================================" } #end if ELSE { Write-Host "File has no or an invalid Fulltext!" } #end else } #end foreach } #end of ELSE { Write-Host "No files to process!" } #end else Remove-Variable * -ErrorAction SilentlyContinue $Error.Clear() #Sources: #https://vasol.eu/working-with-pdfs-with-powershell-in-run-net-script-activites/ #Create SQL TB #SET ANSI_NULLS ON #GO # #SET QUOTED_IDENTIFIER ON #GO # #CREATE TABLE [dbo].[itextsharp]( # [GUID] [bigint] IDENTITY(1,1) NOT NULL, # [FILENAME] [varchar](50) NOT NULL, # [FILECONTENT] [varchar](max) NOT NULL, # [STATE] [int] NULL, # [ADDED_WHEN] [datetime] NULL #) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] #GO # #ALTER TABLE [dbo].[itextsharp] ADD CONSTRAINT [DF_itextsharp_ADDED_WHEN] DEFAULT (getdate()) FOR [ADDED_WHEN] #GO