219 lines
6.3 KiB
PowerShell
219 lines
6.3 KiB
PowerShell
#PowerShell 4.0 Script
|
||
|
||
#Digital Data
|
||
#Ludwig-Rinn-Strasse 16
|
||
#35452 Heuchelheim
|
||
#Tel.: 0641 / 202360
|
||
#E-Mail: info@digitaldata.works
|
||
|
||
#Version Number 1.0.0.0
|
||
#Version Date 20.11.2020
|
||
|
||
#Requires –Version 4.0
|
||
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
######################################## check for arguments ##########################################
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
################################## add additional buildin assemblys ###################################
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
############################################ set variables ############################################
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
|
||
[string]$PDFExtractorDLL = "E:\itextsharp\itextsharp.dll"
|
||
|
||
[string]$SourcePath = "E:\itextsharp\in"
|
||
[string]$ArchivPath = "E:\itextsharp\out"
|
||
[string]$ErrorPath = "E:\itextsharp\error"
|
||
|
||
[string]$SourceFileExtension = "*.pdf"
|
||
|
||
[string]$DBSQLConnectServer = "172.24.12.41\tests"
|
||
[string]$DBSQLConnectDatabase = "DD_Test"
|
||
[string]$DBSQLConnectUser = "sa"
|
||
[string]$DBSQLConnectPassword = "dd"
|
||
[string]$DBSQLConnectIntegratedSecurity = $false
|
||
[string]$DBSQLQueryHead = "INSERT INTO [itextsharp] (FILENAME, FILECONTENT)"
|
||
|
||
[int]$Counter = 0
|
||
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
########################################### preparing part ############################################
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
|
||
#Clear Console Content
|
||
Clear-Host
|
||
|
||
#get all relevant files
|
||
[Array]$Items = Get-ChildItem -Path $SourcePath -Filter $SourceFileExtension
|
||
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
############################################# main part ###############################################
|
||
#-----------------------------------------------------------------------------------------------------#
|
||
|
||
IF ($Items.Count -ge 1) {
|
||
|
||
Write-Host "Found" $Items.Count $SourceFileExtension.Replace("*.","")"Files!"
|
||
|
||
TRY {
|
||
|
||
$DBSQLConnection = New-Object System.Data.SqlClient.SqlConnection
|
||
$DBSQLConnection.ConnectionString = "Server = $DBSQLConnectServer; uid=$DBSQLConnectUser; pwd=$DBSQLConnectPassword; Database = $DBSQLConnectDatabase; Integrated Security = $DBSQLConnectIntegratedSecurity"
|
||
|
||
$DBSQLCommand = New-Object System.Data.SqlClient.SqlCommand
|
||
$DBSQLCommand.Connection = $DBSQLConnection
|
||
|
||
} #end try
|
||
|
||
CATCH {
|
||
|
||
Write-Host "Cannot prepare db connect!"
|
||
Write-Host $DBSQLConnectServer
|
||
Write-Host $DBSQLConnectDatabase
|
||
Write-Host $DBSQLConnectUser
|
||
Write-Host $DBSQLConnectPassword
|
||
Write-Host $DBSQLConnectIntegratedSecurity
|
||
Write-Host $DBSQLQueryHead
|
||
Write-Error $Error[0]
|
||
Remove-Variable * -ErrorAction SilentlyContinue
|
||
$Error.Clear()
|
||
EXIT
|
||
|
||
} #end catch
|
||
|
||
FOREACH ($Item in $Items) {
|
||
|
||
[int]$Counter++ | Out-Null
|
||
|
||
Write-Host ""
|
||
Write-Host "================================="
|
||
Write-Host "---------------------------------"
|
||
Write-Host "Item" $Counter "of" $Items.count
|
||
Write-Host "Processing File:" $Item
|
||
|
||
TRY {
|
||
|
||
$PDFExtractJob = Start-Job -ScriptBlock {
|
||
|
||
$PDFExtractorDLL = $args[0]
|
||
$ItemFullName = $args[1]
|
||
|
||
Add-Type -path $PDFExtractorDLL
|
||
$PDFReader = [iTextSharp.text.pdf.parser.PdfTextExtractor]
|
||
$PDFExtract = $NULL
|
||
$PDFExtract = $PDFReader::GetTextFromPage($ItemFullName,1)
|
||
|
||
$PDFExtract = $PDFExtract.TrimStart()
|
||
$PDFExtract = $PDFExtract.TrimEnd()
|
||
$PDFExtract = $PDFExtract.Replace("'","")
|
||
$PDFExtract = $PDFExtract.Replace('"','')
|
||
$PDFExtract = $PDFExtract.Replace("‘","")
|
||
$PDFExtract = $PDFExtract.Replace("’","")
|
||
$PDFExtract = $PDFExtract.Replace("·","")
|
||
|
||
Write-Output $PDFExtract
|
||
|
||
} -ArgumentList $PDFExtractorDLL, $($Item.FullName)
|
||
|
||
|
||
$PDFExtract = Receive-Job -Job $PDFExtractJob -Wait
|
||
|
||
} #end try
|
||
|
||
CATCH {
|
||
|
||
Write-Host "Cannot read file fulltext!"
|
||
Write-Error $Error[0]
|
||
Remove-Variable * -ErrorAction SilentlyContinue
|
||
$Error.Clear()
|
||
|
||
} #end catch
|
||
|
||
IF ($PDFExtract.Length -gt 20) {
|
||
|
||
Write-Host "File has a valid Fulltext!"
|
||
|
||
TRY {
|
||
|
||
$DBSQLQuery = $NULL
|
||
$DBSQLQuery = $DBSQLQueryHead
|
||
$DBSQLQuery = $DBSQLQuery + "VALUES ('$($item.BaseName)', '$PDFExtract');"
|
||
|
||
Write-Host "Executing SQL Query..."
|
||
|
||
$DBSQLCommand = New-Object System.Data.SqlClient.SqlCommand
|
||
$DBSQLCommand.Connection = $DBSQLConnection
|
||
|
||
$DBSQLCommand.CommandText = $DBSQLQuery
|
||
|
||
$DBSQLConnection.Open()
|
||
$DBSQLCommand.ExecuteNonQuery() | Out-Null
|
||
$DBSQLConnection.Close()
|
||
|
||
Move-Item -Path $($Item.fullname) -Destination $ArchivPath -Force
|
||
Write-Host "... done!"
|
||
|
||
} #end try
|
||
|
||
CATCH {
|
||
|
||
$LastErrorQuery = $DBSQLQuery
|
||
Move-Item -path $($Item.fullname) -Destination $ErrorPath -Force
|
||
|
||
$DBSQLConnection.Close()
|
||
Write-Host "Cannot insert to db!"
|
||
Write-Host "Moving file to error path."
|
||
Write-Error $Error[0]
|
||
$Error.Clear()
|
||
|
||
} #end catch
|
||
|
||
Write-Host "---------------------------------"
|
||
Write-Host "================================="
|
||
|
||
} #end if
|
||
|
||
ELSE {
|
||
|
||
Write-Host "File has no or an invalid Fulltext!"
|
||
|
||
} #end else
|
||
|
||
} #end foreach
|
||
|
||
} #end of
|
||
|
||
ELSE {
|
||
|
||
Write-Host "No files to process!"
|
||
|
||
} #end else
|
||
|
||
Remove-Variable * -ErrorAction SilentlyContinue
|
||
$Error.Clear()
|
||
|
||
|
||
#Sources:
|
||
#https://vasol.eu/working-with-pdfs-with-powershell-in-run-net-script-activites/
|
||
|
||
#Create SQL TB
|
||
#SET ANSI_NULLS ON
|
||
#GO
|
||
#
|
||
#SET QUOTED_IDENTIFIER ON
|
||
#GO
|
||
#
|
||
#CREATE TABLE [dbo].[itextsharp](
|
||
# [GUID] [bigint] IDENTITY(1,1) NOT NULL,
|
||
# [FILENAME] [varchar](50) NOT NULL,
|
||
# [FILECONTENT] [varchar](max) NOT NULL,
|
||
# [STATE] [int] NULL,
|
||
# [ADDED_WHEN] [datetime] NULL
|
||
#) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
|
||
#GO
|
||
#
|
||
#ALTER TABLE [dbo].[itextsharp] ADD CONSTRAINT [DF_itextsharp_ADDED_WHEN] DEFAULT (getdate()) FOR [ADDED_WHEN]
|
||
#GO |