CSharp: UglyToad.PdfPig in donet 8.0
/* IDE: VS 2022 17.5 OS: windows 10 .net: 8.0 生成PDF文档,从PDF文档中获取文字内容 控制台下测试 */ // See https://aka.ms/new-console-template for more information using System; using System.Collections.Generic; using System.Linq; using System.Xml.Linq; using UglyToad.PdfPig; using UglyToad.PdfPig.AcroForms; using UglyToad.PdfPig.AcroForms.Fields; using UglyToad.PdfPig.Content; using UglyToad.PdfPig.Outline; using System.IO; using UglyToad.PdfPig.Core; using UglyToad.PdfPig.Fonts.Standard14Fonts; using UglyToad.PdfPig.Fonts.SystemFonts; using UglyToad.PdfPig.Writer; using System.Drawing; using System.Drawing.Text; Console.WriteLine("Hello,CSharp World! Geovin Du,geovindu, 涂聚文\n\t"); try { PdfDocumentBuilder builder = new PdfDocumentBuilder(); //string fontfile = Server.MapPath("fonts/MHeiHK-Light.TTF"); //byte[] robotoBytes = File.ReadAllBytes(fontfile); // PdfDocumentBuilder.AddedFont MHeiHK = builder.AddTrueTypeFont(robotoBytes); // 读取宋体字体文件到字节数组 中文必须是中文字体,相应文字语言,用相关的字体 simsunb.ttf byte[] simSunFontBytes; using (FileStream fontFileStream = File.OpenRead("C:\\Windows\\Fonts\\STSONG.TTF")) { simSunFontBytes = new byte[fontFileStream.Length]; fontFileStream.Read(simSunFontBytes, 0, simSunFontBytes.Length); } // 添加支持中文的字体 PdfDocumentBuilder.AddedFont font = builder.AddTrueTypeFont(simSunFontBytes); PdfDocumentBuilder.AddedFont helvetica = builder.AddStandard14Font(Standard14Font.Helvetica); PdfDocumentBuilder.AddedFont helveticaBold = builder.AddStandard14Font(Standard14Font.HelveticaBold); // PdfDocumentBuilder.AddedFont song = builder.AddStandard14Font(Standard14Font.simsunb); PdfPageBuilder page = builder.AddPage(PageSize.A4); PdfPoint closeToTop = new PdfPoint(15, page.PageSize.Top - 25); page.AddText("My first PDF document!", 12, closeToTop, helvetica); page.AddText("Hello CSharp World!,Geovin Du!", 10, closeToTop.Translate(0, -15), helveticaBold); page = builder.AddPage(PageSize.A4); page.AddText("geovindu!", 12, closeToTop, helvetica); //中文用中文系统字体 page = builder.AddPage(PageSize.A4); //写入 page.AddText("你好,这是一个PDF文档。涂聚文欢迎你!", 12, new PdfPoint(25, 520), font); //byte[] b = builder.Build(); string fiel = "file.pdf"; File.WriteAllBytes(fiel, builder.Build()); Console.WriteLine("文档生成ok\n\t"); //从PDF文件中读取文字内容 string fileout ="1.pdf"; using (PdfDocument document = PdfDocument.Open(fileout)) { foreach (UglyToad.PdfPig.Content.Page pagedu in document.GetPages()) { IEnumerable<Word> words = pagedu.GetWords(); foreach (Word word in words) { Console.WriteLine(word.Text); } } } Console.WriteLine("\n\t从PDF文件中读取文字内容ok"); } catch(Exception ex) { Console.WriteLine(ex.Message.ToString()); }
https://github.com/BobLd/PdfPig/tree/table-extractor-2
https://github.com/kba/hocr-spec
https://github.com/kba/hocrjs
Concurrency in .NET
https://github.com/rikace/fConcBook
https://dotnetcurry.com/dotnet/1360/concurrent-programming-dotnet-core
https://www.csharptutorial.net/csharp-concurrency/
https://www.oreilly.com/library/view/concurrency-in-net/9781617292996/
https://blog.christian-schou.dk/blog/concurrency-vs-parallelism-vs-asynchronous/
Concurrency in C++
https://www.codeproject.com/Articles/1271904/Programming-Concurrency-in-Cplusplus-Part-1
https://www.codeproject.com/Articles/1278737/Programming-Concurrency-in-Cplusplus-Part-2
https://www.modernescpp.org/wp-content/uploads/2023/04/Concurrency.pdf
https://www.codeproject.com/Tips/5376066/Solving-Fizz-Buzz-in-Csharp-and-Cplusplus
https://www.classes.cs.uchicago.edu/archive/2013/spring/12300-1/labs/lab6/
concurrency in Java
https://github.com/RadekKoubsky/java-concurrency-in-practice-examples
https://github.com/LeonardoZ/java-concurrency-patterns
concurrency in python
https://stackabuse.com/concurrency-in-python/
https://github.com/ro6ley/python-concurrency-example
/* IDE: VS 2022 17.6 OS: windows 10 .NET 8.0 FROM https://github.com/BobLd/PdfPig https://github.com/UglyToad/PdfPig/wiki/Document-Layout-Analysis https://github.com/UglyToad/PdfPig/issues/617 */ namespace ConsoleAppPdfDemo { using UglyToad.PdfPig.Content; using UglyToad.PdfPig.Core; using UglyToad.PdfPig.Fonts.Standard14Fonts; using UglyToad.PdfPig.Writer; using UglyToad.PdfPig; using UglyToad.PdfPig.DocumentLayoutAnalysis.TableExtractor; using System.Diagnostics; //using static System.Net.Mime.MediaTypeNames; using System.Drawing; using System.Net; /// <summary> /// /// </summary> internal class Program { private static double cmToPdfUnits(double cm) => cm / 2.54 * 72; /// <summary> /// /// </summary> /// <param name="args"></param> static void Main(string[] args) { Console.WriteLine("Hello,CSharp World! Geovin Du,geovindu, 涂聚文\n\t"); try { PdfDocumentBuilder builder = new PdfDocumentBuilder(); //string fontfile = Server.MapPath("fonts/MHeiHK-Light.TTF"); //byte[] robotoBytes = File.ReadAllBytes(fontfile); // PdfDocumentBuilder.AddedFont MHeiHK = builder.AddTrueTypeFont(robotoBytes); // 读取宋体字体文件到字节数组 中文必须是中文字体,相应文字语言,用相关的字体 simsunb.ttf byte[] simSunFontBytes; using (FileStream fontFileStream = File.OpenRead("C:\\Windows\\Fonts\\STSONG.TTF")) { simSunFontBytes = new byte[fontFileStream.Length]; fontFileStream.Read(simSunFontBytes, 0, simSunFontBytes.Length); } string baseurl = Environment.CurrentDirectory.ToString() + "\\"; // 添加支持中文的字体 PdfDocumentBuilder.AddedFont font = builder.AddTrueTypeFont(simSunFontBytes); PdfDocumentBuilder.AddedFont helvetica = builder.AddStandard14Font(Standard14Font.Helvetica); PdfDocumentBuilder.AddedFont helveticaBold = builder.AddStandard14Font(Standard14Font.HelveticaBold); // PdfDocumentBuilder.AddedFont song = builder.AddStandard14Font(Standard14Font.simsunb); //第1页 PdfPageBuilder page = builder.AddPage(PageSize.A4); PdfPoint closeToTop = new PdfPoint(15, page.PageSize.Top - 25); page.AddText("My first PDF document!言语成了邀功尽责的功臣,还需要行为每日值班吗?", 12, closeToTop, font); page.AddText("Hello CSharp World!,Geovin Du!涂聚文,geovindu", 10, closeToTop.Translate(0, -15), font); var imgstream = new FileStream(baseurl+ @"images\logo.jpg", FileMode.Open); var imgX = cmToPdfUnits(2.5); var imgY = cmToPdfUnits(14); var imgWidth = cmToPdfUnits(16); var imgHeight = cmToPdfUnits(12); page.AddJpeg(imgstream, new PdfRectangle(imgX, imgY, imgX + imgWidth, imgY + imgHeight)); //.jpg //第二页 page = builder.AddPage(PageSize.A4); page.AddText("geovindu!", 12, new PdfPoint(15, 815), font); //中文用中文系统字体 845 page.AddText("励学篇", 12, new PdfPoint(15, 800), font); page.AddText("宋 赵恒", 12, new PdfPoint(15, 785), font); page.AddText("富家不用买良田,书中自有千钟粟。", 12, new PdfPoint(15, 770), font); page.AddText("安居不用架高堂,书中自有黄金屋。", 12, new PdfPoint(15, 755), font); page.AddText("出门莫恨无人随,书中车马多如簇。", 12, new PdfPoint(15, 740), font); page.AddText("娶妻莫恨无良媒,书中自有颜如玉。", 12, new PdfPoint(15, 725), font); page.AddText("男儿欲遂平生志,五经勤向窗前读。", 12, new PdfPoint(15, 710), font); page.AddText("", 12, new PdfPoint(15, 695), font); page.AddText("", 12, new PdfPoint(15, 780), font); page.AddText("", 12, new PdfPoint(15, 765), font); //第3页 page = builder.AddPage(PageSize.A4); //写入 page.AddText("你好,这是一个PDF文档。涂聚文欢迎你!", 12, new PdfPoint(25, 520), font); //byte[] b = builder.Build(); string fiel = "geovindu" + DateTime.Now.ToString("yyyyMMHHmmss") + ".pdf"; File.WriteAllBytes(fiel, builder.Build()); Console.WriteLine("文档生成ok\n\t"); //从PDF文件中读取文字内容 string fileout = "1.pdf"; using (PdfDocument document = PdfDocument.Open(fileout)) { foreach (UglyToad.PdfPig.Content.Page pagedu in document.GetPages()) { IEnumerable<Word> words = pagedu.GetWords(); foreach (Word word in words) { Console.WriteLine(word.Text); } } } Console.WriteLine("\n\t从PDF文件中读取文字内容ok"); //預覽文件 var process = new Process { StartInfo = new ProcessStartInfo(fiel) { UseShellExecute = true } }; process.Start(); process.WaitForExit(); } catch (Exception ex) { Console.WriteLine(ex.Message.ToString()); } } } }
https://github.com/BobLd/PdfPig/tree/table-extractor-2
https://github.com/BobLd/PdfPig/tree/table-extractor