FilterReader: Add new project
This commit is contained in:
169
FilterReader/FilterReader.cs
Normal file
169
FilterReader/FilterReader.cs
Normal file
@@ -0,0 +1,169 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DigitalData.Modules.FilterReader
|
||||
{
|
||||
/// <summary>
|
||||
/// Implements a TextReader that reads from an IFilter.
|
||||
/// </summary>
|
||||
public class FilterReader : TextReader
|
||||
{
|
||||
IFilter _filter;
|
||||
private bool _done;
|
||||
private STAT_CHUNK _currentChunk;
|
||||
private bool _currentChunkValid;
|
||||
private char[] _charsLeftFromLastRead;
|
||||
|
||||
public override void Close()
|
||||
{
|
||||
Dispose(true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
~FilterReader()
|
||||
{
|
||||
Dispose(false);
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (_filter != null)
|
||||
Marshal.ReleaseComObject(_filter);
|
||||
}
|
||||
|
||||
public override int Read(char[] array, int offset, int count)
|
||||
{
|
||||
int endOfChunksCount = 0;
|
||||
int charsRead = 0;
|
||||
|
||||
while (!_done && charsRead < count)
|
||||
{
|
||||
if (_charsLeftFromLastRead != null)
|
||||
{
|
||||
int charsToCopy = (_charsLeftFromLastRead.Length < count - charsRead) ? _charsLeftFromLastRead.Length : count - charsRead;
|
||||
Array.Copy(_charsLeftFromLastRead, 0, array, offset + charsRead, charsToCopy);
|
||||
charsRead += charsToCopy;
|
||||
if (charsToCopy < _charsLeftFromLastRead.Length)
|
||||
{
|
||||
char[] tmp = new char[_charsLeftFromLastRead.Length - charsToCopy];
|
||||
Array.Copy(_charsLeftFromLastRead, charsToCopy, tmp, 0, tmp.Length);
|
||||
_charsLeftFromLastRead = tmp;
|
||||
}
|
||||
else
|
||||
_charsLeftFromLastRead = null;
|
||||
continue;
|
||||
};
|
||||
|
||||
if (!_currentChunkValid)
|
||||
{
|
||||
IFilterReturnCode res = _filter.GetChunk(out _currentChunk);
|
||||
_currentChunkValid = (res == IFilterReturnCode.S_OK) && ((_currentChunk.flags & CHUNKSTATE.CHUNK_TEXT) != 0);
|
||||
|
||||
if (res == IFilterReturnCode.FILTER_E_END_OF_CHUNKS)
|
||||
endOfChunksCount++;
|
||||
|
||||
if (endOfChunksCount > 1)
|
||||
_done = true; //That's it. no more chuncks available
|
||||
}
|
||||
|
||||
if (_currentChunkValid)
|
||||
{
|
||||
uint bufLength = (uint)(count - charsRead);
|
||||
if (bufLength < 8192)
|
||||
bufLength = 8192; //Read ahead
|
||||
|
||||
char[] buffer = new char[bufLength];
|
||||
IFilterReturnCode res = _filter.GetText(ref bufLength, buffer);
|
||||
if (res == IFilterReturnCode.S_OK || res == IFilterReturnCode.FILTER_S_LAST_TEXT)
|
||||
{
|
||||
int cRead = (int)bufLength;
|
||||
if (cRead + charsRead > count)
|
||||
{
|
||||
int charsLeft = (cRead + charsRead - count);
|
||||
_charsLeftFromLastRead = new char[charsLeft];
|
||||
Array.Copy(buffer, cRead - charsLeft, _charsLeftFromLastRead, 0, charsLeft);
|
||||
cRead -= charsLeft;
|
||||
}
|
||||
else
|
||||
_charsLeftFromLastRead = null;
|
||||
|
||||
Array.Copy(buffer, 0, array, offset + charsRead, cRead);
|
||||
charsRead += cRead;
|
||||
}
|
||||
|
||||
if (res == IFilterReturnCode.FILTER_S_LAST_TEXT || res == IFilterReturnCode.FILTER_E_NO_MORE_TEXT)
|
||||
_currentChunkValid = false;
|
||||
}
|
||||
}
|
||||
return charsRead;
|
||||
}
|
||||
|
||||
public override string ReadToEnd()
|
||||
{
|
||||
IList<string> chunks = new List<string>();
|
||||
|
||||
// read all the chunks
|
||||
IFilterReturnCode chunckResult = _filter.GetChunk(out _currentChunk);
|
||||
while (chunckResult != IFilterReturnCode.FILTER_E_END_OF_CHUNKS)
|
||||
{
|
||||
|
||||
// process only text type chunks
|
||||
bool textChunk = (chunckResult == IFilterReturnCode.S_OK) && ((_currentChunk.flags & CHUNKSTATE.CHUNK_TEXT) != 0);
|
||||
if (textChunk)
|
||||
{
|
||||
|
||||
string chunkText = "";
|
||||
uint bufLength = 8 * 1024;
|
||||
char[] buffer = new char[bufLength];
|
||||
|
||||
// build chunk list of strings
|
||||
IFilterReturnCode textResult = _filter.GetText(ref bufLength, buffer);
|
||||
while (textResult == IFilterReturnCode.S_OK || textResult == IFilterReturnCode.FILTER_S_LAST_TEXT)
|
||||
{
|
||||
chunkText += new string(buffer).Replace("\0", "").Replace("\t", " ") + " ";
|
||||
|
||||
if (textResult == IFilterReturnCode.S_OK)
|
||||
{
|
||||
|
||||
// read more text
|
||||
buffer = new char[bufLength]; // get fresh buffer
|
||||
bufLength = 8 * 1024;
|
||||
textResult = _filter.GetText(ref bufLength, buffer);
|
||||
|
||||
if (textResult != IFilterReturnCode.S_OK)
|
||||
{
|
||||
chunkText = chunkText.Trim();
|
||||
if (!string.IsNullOrEmpty(chunkText))
|
||||
chunks.Add(chunkText);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// stop reading text
|
||||
textResult = IFilterReturnCode.FILTER_E_NO_MORE_TEXT;
|
||||
chunkText = chunkText.Trim();
|
||||
if (!string.IsNullOrEmpty(chunkText))
|
||||
chunks.Add(chunkText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get next chunk
|
||||
chunckResult = _filter.GetChunk(out _currentChunk);
|
||||
}
|
||||
|
||||
return string.Join("\r\n", chunks);
|
||||
}
|
||||
|
||||
public FilterReader(string fileName)
|
||||
{
|
||||
_filter = FilterLoader.LoadAndInitIFilter(fileName);
|
||||
if (_filter == null)
|
||||
throw new ArgumentException("no filter defined for " + fileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user