pdf 使用心得
最近公司要用一个pdf生成相关的功能,借此机会把生成pdf的开源工具,大部分是熟悉了一下,希望对新做的人能有所帮助,
Itextsharp 这个是用的最多的,也是生成pdf文件应该是最快的,也是最容易上手的工具,仅仅几行的代码就生成一个标准的
pdf文件
Document document = new Document();
PdfWriter wirte = PdfWriter.GetInstance(document, new FileStream("Chap0101c.pdf", FileMode.Create));
document.Open();
Image img = Image.GetInstance(@"http://www.google.cn/intl/zh-CN/images/logo_cn.gif");
img.Alt = "fsfsfsfsfsfsfsF";
document.Add(img);
document.Close();
当然要加上itextsharp的 dll 了,不过这个source 的sample 都是比较旧的,好多功能都和例子中的有很大的出入的。具体的功能
可以参照例子,还有就是 http://www.cnblogs.com/hardrock/category/53568.html,是博客园的一个朋友的,我就不细说了
pdfbox 是java的开源的一个版本,不过有.net的版本的dll 可以用的,不过用的时候大部分的东西还是java的比若FileInputStream
就是java.io命名控件的东东,不过用起来也是蛮方便的,很容易就找到了pdf中的一些pdfobject对象,比如 image text 等等的功能
还是比较有用的
PDDocument doc = PDDocument.load(readFile);
PDDocument pdf = null;
FileInputStream pdfStream = null;
try
{
pdfStream = new FileInputStream(readFile);
PDFParser pdfParser = new PDFParser(pdfStream);
pdfParser.parse(); // 分析
pdf = pdfParser.getPDDocument();
int imageCounter = 1;
List pages = pdf.getDocumentCatalog().getAllPages();
Iterator iter = pages.iterator();
while (iter.hasNext())
{
PDPage page = (PDPage)iter.next();
PDResources resources = page.getResources();
Map images = resources.getImages();
if (images != null)
{
Iterator imageIter = images.keySet().iterator();
while (imageIter.hasNext())
{
string key = (string)imageIter.next();
PDXObjectImage image = (PDXObjectImage)images.get(key);
string fname = key+"." +image.getSuffix ();
}
}
}
}
catch (java.lang.Exception ex)
{
Console.WriteLine(ex.getMessage());
}
finally
{
if (pdfStream != null)
{
pdfStream.close();
}
}
如果对pdf的结构比较熟悉的话,可以用他来,应该是比较容易的就能满足你的要求
最后我介绍一下 PdfSharp
这个是我用来最后能满足我们要求的一个dll
下面的方法是获得pdf文件中的text
public string ExtractContentForSummary()
{
string result = string.Empty;
try
{
document = PdfReader.Open(_fileName);
foreach (PdfPage page in document.Pages)
{
CSequence seq = ContentReader.ReadContent(page);
for (int idx = 0; idx < seq.Count; idx++)
{
CObject obj = seq[idx];
COperator op = obj as COperator;
if (op != null && op.OpCode.OpCodeName == OpCodeName.Tj)
{
if (op.Operands.Count == 1)
{
CString s = op.Operands[0] as CString;
result += s;
}
}
}
}
document.Close();
return result;
}
catch (PdfSharpException ex)
{
throw ex;
}
finally
{
document.Close();
}
}
获得所有的image对象
public PdfDictionary[] GetAllImageFromPDF(ref PdfDocument document)
{
List<PdfDictionary> list = new List<PdfDictionary>();
// Iterate pages
foreach (PdfPage page in document.Pages)
{
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
PdfItem[] items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
list.Add(xObject);
}
}
}
}
}
}
return list.ToArray();
}
替换图片的
public void PlaceSignature(PositionInfo posInfo, string pictureFile)
{
try
{
document = PdfReader.Open(_fileName);
List<PdfDictionary> list = new List<PdfDictionary>();
// Iterate pages
foreach (PdfPage page in document.Pages)
{
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
PdfItem[] items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
string position = GlobalFunction.GetCommentFromJPG(xObject.Stream.Value);
if (position == posInfo.Name)
{
XImage ximage = XImage.FromFile(pictureFile);
PdfImage pdfimage = new PdfImage(document, ximage);
xObject.Stream.Value = pdfimage.Stream.Value;
document.Save(FileName);
ximage.Dispose();
break;
}
}
}
}
}
}
}
}
catch (PdfSharpException ex)
{
throw ex;
}
finally
{
document.Close();
}
}
以上就是大部分人用到的用途,正好把我用到的方法共享出来,希望对别人有用,也欢迎有好的方法可以和我交流
Itextsharp 这个是用的最多的,也是生成pdf文件应该是最快的,也是最容易上手的工具,仅仅几行的代码就生成一个标准的
pdf文件
Document document = new Document();
PdfWriter wirte = PdfWriter.GetInstance(document, new FileStream("Chap0101c.pdf", FileMode.Create));
document.Open();
Image img = Image.GetInstance(@"http://www.google.cn/intl/zh-CN/images/logo_cn.gif");
img.Alt = "fsfsfsfsfsfsfsF";
document.Add(img);
document.Close();
当然要加上itextsharp的 dll 了,不过这个source 的sample 都是比较旧的,好多功能都和例子中的有很大的出入的。具体的功能
可以参照例子,还有就是 http://www.cnblogs.com/hardrock/category/53568.html,是博客园的一个朋友的,我就不细说了
pdfbox 是java的开源的一个版本,不过有.net的版本的dll 可以用的,不过用的时候大部分的东西还是java的比若FileInputStream
就是java.io命名控件的东东,不过用起来也是蛮方便的,很容易就找到了pdf中的一些pdfobject对象,比如 image text 等等的功能
还是比较有用的
PDDocument doc = PDDocument.load(readFile);
PDDocument pdf = null;
FileInputStream pdfStream = null;
try
{
pdfStream = new FileInputStream(readFile);
PDFParser pdfParser = new PDFParser(pdfStream);
pdfParser.parse(); // 分析
pdf = pdfParser.getPDDocument();
int imageCounter = 1;
List pages = pdf.getDocumentCatalog().getAllPages();
Iterator iter = pages.iterator();
while (iter.hasNext())
{
PDPage page = (PDPage)iter.next();
PDResources resources = page.getResources();
Map images = resources.getImages();
if (images != null)
{
Iterator imageIter = images.keySet().iterator();
while (imageIter.hasNext())
{
string key = (string)imageIter.next();
PDXObjectImage image = (PDXObjectImage)images.get(key);
string fname = key+"." +image.getSuffix ();
}
}
}
}
catch (java.lang.Exception ex)
{
Console.WriteLine(ex.getMessage());
}
finally
{
if (pdfStream != null)
{
pdfStream.close();
}
}
如果对pdf的结构比较熟悉的话,可以用他来,应该是比较容易的就能满足你的要求
最后我介绍一下 PdfSharp
这个是我用来最后能满足我们要求的一个dll
下面的方法是获得pdf文件中的text
public string ExtractContentForSummary()
{
string result = string.Empty;
try
{
document = PdfReader.Open(_fileName);
foreach (PdfPage page in document.Pages)
{
CSequence seq = ContentReader.ReadContent(page);
for (int idx = 0; idx < seq.Count; idx++)
{
CObject obj = seq[idx];
COperator op = obj as COperator;
if (op != null && op.OpCode.OpCodeName == OpCodeName.Tj)
{
if (op.Operands.Count == 1)
{
CString s = op.Operands[0] as CString;
result += s;
}
}
}
}
document.Close();
return result;
}
catch (PdfSharpException ex)
{
throw ex;
}
finally
{
document.Close();
}
}
获得所有的image对象
public PdfDictionary[] GetAllImageFromPDF(ref PdfDocument document)
{
List<PdfDictionary> list = new List<PdfDictionary>();
// Iterate pages
foreach (PdfPage page in document.Pages)
{
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
PdfItem[] items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
list.Add(xObject);
}
}
}
}
}
}
return list.ToArray();
}
替换图片的
public void PlaceSignature(PositionInfo posInfo, string pictureFile)
{
try
{
document = PdfReader.Open(_fileName);
List<PdfDictionary> list = new List<PdfDictionary>();
// Iterate pages
foreach (PdfPage page in document.Pages)
{
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
PdfItem[] items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
string position = GlobalFunction.GetCommentFromJPG(xObject.Stream.Value);
if (position == posInfo.Name)
{
XImage ximage = XImage.FromFile(pictureFile);
PdfImage pdfimage = new PdfImage(document, ximage);
xObject.Stream.Value = pdfimage.Stream.Value;
document.Save(FileName);
ximage.Dispose();
break;
}
}
}
}
}
}
}
}
catch (PdfSharpException ex)
{
throw ex;
}
finally
{
document.Close();
}
}
以上就是大部分人用到的用途,正好把我用到的方法共享出来,希望对别人有用,也欢迎有好的方法可以和我交流