Paperyard
ppyrd.scanner.php
Go to the documentation of this file.
1 <?php
2  require_once('dbHandler.php');
3  require_once('ppyrd.base.php');
4 
5 
10  class pdfScanner extends ppyrd
11  {
15  function __construct($pdf,$db)
16  {
17  $this->pdf = $pdf;
18  $this->db = $db;
19  $this->tesseractCommand = $this->db->getConfigValue('tesseractCommand');
20  }
21 
25  function run()
26  {
27  $this->output("executing on " . $this->pdf);
28  // ensuring that we only have one OcrMyPDF running process running
29  // due to cron usage and large PDFs this could be an issue
30  $fp = fopen('/tmp/ppyrdOcrMyPdf.txt', 'w+');
31 
32  // checking if lock has been set properly
33  if(flock($fp, LOCK_EX))
34  {
35  // running OCRmyPDF
36  exec($this->tesseractCommand . " '" . $this->pdf . "' '/data/inbox/" . $this->pdf . "'");
37  if (file_exists("/data/inbox/" . $this->pdf))
38  {
39  // fixing user permissions since OCR is run as www-data
40  $this->output("fixing permissions");
41  exec ("user_id=$(stat -c '%u:%g' " . $this->pdf . '); chown $user_id /data/inbox/' . $this->pdf);
42  $this->output("found ok OCR - moving input to archive");
43  exec("mv --backup=numbered '" . $this->pdf . "' '/data/scan/archive/" . $this->pdf . "'");
44  } else
45  {
46  $this->output("did not find ok OCR - moving input to error");
47  exec("mv --backup=numbered '" . $this->pdf . "' '/data/scan/error/" . $this->pdf . "'");
48  }
49 
50  } else
51  {
52  // this will be echoed in case OCRmyPDF is still running
53  $this->output("OcrMyPdf still running - cannot interfere with it ... if this persists too long check /tmp/ppyrOcrMyPdf.txt and delete");
54  }
55 
56  // closing lock again - so other instances can be started.
57  fclose($fp);
58 
59  }
60  }
61 
62 // main program
63 
67 $db=new dbHandler();
68 $ppyrd = new ppyrd($db);
69 
70 
71 // looping main directory and calling the pdf parser
72 
76  $ppyrd->checkCliVsWebserver();
77 
78 
79 // checking if any new PDFs need to be OCRed
80 chdir("/data/scan");
81 
82 //loop all pdfs
83 $pdfs = glob("*.pdf");
84 foreach($pdfs as $pdf){
85  $pdf=new pdfScanner($pdf, $db);
86  $pdf->run();
87 }
88 
89 $db->close();
90 
91 ?>
handling database connection and queries
Definition: dbHandler.php:15
takes PDFs and runs OCRmyPDF on them
output($string, $debug=0)
outputs string
Definition: ppyrd.base.php:78
__construct($pdf, $db)
constructor taking care of setup
$pdfs
run()
function executing main logic
$ppyrd