python read pdf and display in console
# coding=utf-8
import curses
#pip install pdfminer.six
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = open(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)
text = retstr.getvalue()
fp.close()
device.close()
retstr.close()
return text
def main(lines):
# get the curses screen window
screen = curses.initscr()
# turn off input echoing
curses.noecho()
# respond to keys immediately (don't wait for enter)
curses.cbreak()
# map arrow keys to special values
screen.keypad(True)
start = 0
end = len(lines)
screen.addstr(0, 0, 'ready')
try:
while True:
char = screen.getch()
if char == ord('q'):
break
elif char == curses.KEY_RIGHT:
start = start + 10
if (start > end):
start = end -1
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
elif char == curses.KEY_LEFT:
start = start - 10
if (start < 0):
start = 0
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
elif char == curses.KEY_UP:
start = start - 1
if (start < 0):
start = 0
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
elif char == curses.KEY_DOWN:
start = start + 1
if (start > end):
start = end -1
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
finally:
# shut down cleanly
curses.nocbreak(); screen.keypad(0); curses.echo()
curses.endwin()
if __name__ == "__main__":
#testPdf()
s = convert_pdf_to_txt('c:/test/d_to_m.pdf')
lines = s.split("\n")
print(len(lines))
for line in lines:
if (len(line.strip()) == 0):
lines.remove(line)
print(len(lines))
main(lines)