CSharp: UglyToad.PdfPig in donet 8.0

 

/*
 IDE: VS 2022 17.5
 OS: windows 10
 .net: 8.0
  生成PDF文档,从PDF文档中获取文字内容  控制台下测试
 */

// See https://aka.ms/new-console-template for more information

using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml.Linq;
using UglyToad.PdfPig;
using UglyToad.PdfPig.AcroForms;
using UglyToad.PdfPig.AcroForms.Fields;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.Outline;
using System.IO;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Fonts.Standard14Fonts;
using UglyToad.PdfPig.Fonts.SystemFonts;
using UglyToad.PdfPig.Writer;
using System.Drawing;
using System.Drawing.Text;



        Console.WriteLine("Hello,CSharp World! Geovin Du,geovindu, 涂聚文\n\t");

        try { 

            PdfDocumentBuilder builder = new PdfDocumentBuilder();

            //string fontfile = Server.MapPath("fonts/MHeiHK-Light.TTF");
            //byte[] robotoBytes = File.ReadAllBytes(fontfile);
            // PdfDocumentBuilder.AddedFont MHeiHK = builder.AddTrueTypeFont(robotoBytes);

            // 读取宋体字体文件到字节数组 中文必须是中文字体,相应文字语言,用相关的字体   simsunb.ttf
            byte[] simSunFontBytes;
            using (FileStream fontFileStream = File.OpenRead("C:\\Windows\\Fonts\\STSONG.TTF"))
            {
                simSunFontBytes = new byte[fontFileStream.Length];
                fontFileStream.Read(simSunFontBytes, 0, simSunFontBytes.Length);
            }





            // 添加支持中文的字体  
            PdfDocumentBuilder.AddedFont font = builder.AddTrueTypeFont(simSunFontBytes);





            PdfDocumentBuilder.AddedFont helvetica = builder.AddStandard14Font(Standard14Font.Helvetica);
            PdfDocumentBuilder.AddedFont helveticaBold = builder.AddStandard14Font(Standard14Font.HelveticaBold);

            //  PdfDocumentBuilder.AddedFont song = builder.AddStandard14Font(Standard14Font.simsunb);


            PdfPageBuilder page = builder.AddPage(PageSize.A4);

            PdfPoint closeToTop = new PdfPoint(15, page.PageSize.Top - 25);

            page.AddText("My first PDF document!", 12, closeToTop, helvetica);

            page.AddText("Hello CSharp World!,Geovin Du!", 10, closeToTop.Translate(0, -15), helveticaBold);

            page = builder.AddPage(PageSize.A4);

            page.AddText("geovindu!", 12, closeToTop, helvetica); //中文用中文系统字体

            page = builder.AddPage(PageSize.A4);


            //写入
            page.AddText("你好,这是一个PDF文档。涂聚文欢迎你!", 12, new PdfPoint(25, 520), font);

            //byte[] b = builder.Build();

            string fiel = "file.pdf";
            File.WriteAllBytes(fiel, builder.Build());
            Console.WriteLine("文档生成ok\n\t");
            //从PDF文件中读取文字内容
            string fileout ="1.pdf";
            using (PdfDocument document = PdfDocument.Open(fileout))
            {
                foreach (UglyToad.PdfPig.Content.Page pagedu in document.GetPages())
                {
                    IEnumerable<Word> words = pagedu.GetWords();
                    foreach (Word word in words)
                    {
                        Console.WriteLine(word.Text);
                    }
                }
            }
            Console.WriteLine("\n\t从PDF文件中读取文字内容ok");



         }
         catch(Exception ex)
         {
             Console.WriteLine(ex.Message.ToString());
         }

  


https://github.com/BobLd/PdfPig/tree/table-extractor-2
https://github.com/kba/hocr-spec
https://github.com/kba/hocrjs

 

Concurrency in .NET
https://github.com/rikace/fConcBook
https://dotnetcurry.com/dotnet/1360/concurrent-programming-dotnet-core
https://www.csharptutorial.net/csharp-concurrency/
https://www.oreilly.com/library/view/concurrency-in-net/9781617292996/
https://blog.christian-schou.dk/blog/concurrency-vs-parallelism-vs-asynchronous/

Concurrency in C++
https://www.codeproject.com/Articles/1271904/Programming-Concurrency-in-Cplusplus-Part-1
https://www.codeproject.com/Articles/1278737/Programming-Concurrency-in-Cplusplus-Part-2
https://www.modernescpp.org/wp-content/uploads/2023/04/Concurrency.pdf
https://www.codeproject.com/Tips/5376066/Solving-Fizz-Buzz-in-Csharp-and-Cplusplus
https://www.classes.cs.uchicago.edu/archive/2013/spring/12300-1/labs/lab6/


concurrency in Java

 

https://github.com/RadekKoubsky/java-concurrency-in-practice-examples
https://github.com/LeonardoZ/java-concurrency-patterns

concurrency in python
https://stackabuse.com/concurrency-in-python/
https://github.com/ro6ley/python-concurrency-example

 

/*
 IDE: VS 2022 17.6
 OS: windows 10 
 .NET 8.0
  FROM https://github.com/BobLd/PdfPig
 https://github.com/UglyToad/PdfPig/wiki/Document-Layout-Analysis
https://github.com/UglyToad/PdfPig/issues/617

 */


namespace ConsoleAppPdfDemo
{



    using UglyToad.PdfPig.Content;
    using UglyToad.PdfPig.Core;
    using UglyToad.PdfPig.Fonts.Standard14Fonts;
    using UglyToad.PdfPig.Writer;
    using UglyToad.PdfPig;
    using UglyToad.PdfPig.DocumentLayoutAnalysis.TableExtractor;
    using System.Diagnostics;
    //using static System.Net.Mime.MediaTypeNames;
    using System.Drawing;
    using System.Net;


    /// <summary>
    /// 
    /// </summary>
    internal class Program
    {
        private static double cmToPdfUnits(double cm) => cm / 2.54 * 72;
        /// <summary>
        /// 
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {

            Console.WriteLine("Hello,CSharp World! Geovin Du,geovindu, 涂聚文\n\t");
             
            try
            {

                PdfDocumentBuilder builder = new PdfDocumentBuilder();

                //string fontfile = Server.MapPath("fonts/MHeiHK-Light.TTF");
                //byte[] robotoBytes = File.ReadAllBytes(fontfile);
                // PdfDocumentBuilder.AddedFont MHeiHK = builder.AddTrueTypeFont(robotoBytes);

                // 读取宋体字体文件到字节数组 中文必须是中文字体,相应文字语言,用相关的字体   simsunb.ttf
                byte[] simSunFontBytes;
                using (FileStream fontFileStream = File.OpenRead("C:\\Windows\\Fonts\\STSONG.TTF"))
                {
                    simSunFontBytes = new byte[fontFileStream.Length];
                    fontFileStream.Read(simSunFontBytes, 0, simSunFontBytes.Length);
                }


                string baseurl = Environment.CurrentDirectory.ToString() + "\\";


                // 添加支持中文的字体  
                PdfDocumentBuilder.AddedFont font = builder.AddTrueTypeFont(simSunFontBytes);





                PdfDocumentBuilder.AddedFont helvetica = builder.AddStandard14Font(Standard14Font.Helvetica);
                PdfDocumentBuilder.AddedFont helveticaBold = builder.AddStandard14Font(Standard14Font.HelveticaBold);

                //  PdfDocumentBuilder.AddedFont song = builder.AddStandard14Font(Standard14Font.simsunb);

                //第1页
                PdfPageBuilder page = builder.AddPage(PageSize.A4);

                PdfPoint closeToTop = new PdfPoint(15, page.PageSize.Top - 25);

                page.AddText("My first PDF document!言语成了邀功尽责的功臣,还需要行为每日值班吗?", 12, closeToTop, font);

                page.AddText("Hello CSharp World!,Geovin Du!涂聚文,geovindu", 10, closeToTop.Translate(0, -15), font);

                var imgstream = new FileStream(baseurl+ @"images\logo.jpg", FileMode.Open);

                var imgX = cmToPdfUnits(2.5);
                var imgY = cmToPdfUnits(14);
                var imgWidth = cmToPdfUnits(16);
                var imgHeight = cmToPdfUnits(12);
                page.AddJpeg(imgstream, new PdfRectangle(imgX, imgY, imgX + imgWidth, imgY + imgHeight)); //.jpg


                //第二页
                page = builder.AddPage(PageSize.A4);

                page.AddText("geovindu!", 12, new PdfPoint(15, 815), font); //中文用中文系统字体  845
                page.AddText("励学篇", 12, new PdfPoint(15, 800), font);
                page.AddText("宋  赵恒", 12, new PdfPoint(15, 785), font);
                page.AddText("富家不用买良田,书中自有千钟粟。", 12, new PdfPoint(15, 770), font);
                page.AddText("安居不用架高堂,书中自有黄金屋。", 12, new PdfPoint(15, 755), font);
                page.AddText("出门莫恨无人随,书中车马多如簇。", 12, new PdfPoint(15, 740), font);
                page.AddText("娶妻莫恨无良媒,书中自有颜如玉。", 12, new PdfPoint(15, 725), font);
                page.AddText("男儿欲遂平生志,五经勤向窗前读。", 12, new PdfPoint(15, 710), font);
                page.AddText("", 12, new PdfPoint(15, 695), font);
                page.AddText("", 12, new PdfPoint(15, 780), font);
                page.AddText("", 12, new PdfPoint(15, 765), font);

               

                //第3页
                page = builder.AddPage(PageSize.A4);


                //写入
                page.AddText("你好,这是一个PDF文档。涂聚文欢迎你!", 12, new PdfPoint(25, 520), font);

                //byte[] b = builder.Build();

                string fiel = "geovindu" + DateTime.Now.ToString("yyyyMMHHmmss") + ".pdf";
                File.WriteAllBytes(fiel, builder.Build());
                Console.WriteLine("文档生成ok\n\t");


                //从PDF文件中读取文字内容
                string fileout = "1.pdf";
                using (PdfDocument document = PdfDocument.Open(fileout))
                {
                    foreach (UglyToad.PdfPig.Content.Page pagedu in document.GetPages())
                    {
                        IEnumerable<Word> words = pagedu.GetWords();
                        foreach (Word word in words)
                        {
                            Console.WriteLine(word.Text);
                        }
                    }
                }
                Console.WriteLine("\n\t从PDF文件中读取文字内容ok");


                //預覽文件
                var process = new Process
                {
                    StartInfo = new ProcessStartInfo(fiel)
                    {
                        UseShellExecute = true
                    }
                };

                process.Start();
                process.WaitForExit();




            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message.ToString());
            }
        }
    }
}

  

https://github.com/BobLd/PdfPig/tree/table-extractor-2
https://github.com/BobLd/PdfPig/tree/table-extractor

 

posted @ 2024-01-26 18:00  ®Geovin Du Dream Park™  阅读(95)  评论(0编辑  收藏  举报