pdfseparate ../source.pdf page%04d.pdf ls *.pdf | awk '1==1 {printf("convert -quality 100 -density 200 %s %s.tif\n",$0,$0)'} ls *.tif | gawk '1==1 {printf("tesseract -l eng+ita %s %s.txt pdf \n",$0,$0);}' | sh pdfunite *.txt.pdf out.pdf pdftotext out.pdf out.txt