Paperyard
ppyrd.sorter.php
Go to the documentation of this file.
1 <?php
2  require_once('dbHandler.php');
3  require_once('ppyrd.base.php');
4 
5 
11  class pdfSorter extends ppyrd {
12 
19  public function __construct($pdf, $db)
20  {
21  $this->pdf = $pdf;
22 
23  // creating db handler to talk to DB
24  $this->db=$db;
25  }
26 
30  function splitUpFilename()
31  {
32  $this->output("working on: " . $this->pdf);
33 
34  // getting the file name template - needed to see what part (date, company, subject) is at which part of the string
35  $newFilenameStructure=$this->db->getConfigValue('newFilenameStructure');
36 
37  // getting things via regex
38  // removing closing parts of the brackets as they are not needed in this case
39  $unwanted = array(')',']');
40  $separators = "/ - | \(| \[| \-\- /";
41  $templateName = str_replace($unwanted, '', $newFilenameStructure);
42 
43  // splitting up template name into parts
44  $templateParts = array_flip(preg_split($separators, $templateName));
45 
46  // now doing the same stuff with the actual file
47  $tmpName = str_replace($unwanted, '', $this->pdf);
48  $filenameParts = preg_split($separators, $tmpName);
49 
50 
51 
52  // separating the file name into its parts
53  $parts = explode(" - ", $this->pdf);
54 
55  // date is 1st
56  $this->date = $filenameParts[$templateParts['ddatum']];
57  $this->year = date('Y',strtotime($this->date));
58  $this->month = date('m',strtotime($this->date));
59  $this->day = date('d',strtotime($this->date));
60 
61  // company etc.
62  $this->company = $filenameParts[$templateParts['ffirma']];
63  $this->recipient = $filenameParts[$templateParts['wwer']];
64  $this->subject = $filenameParts[$templateParts['bbetreff']];
65  $this->amount = $filenameParts[$templateParts['bbetrag']];
66 
67 
68 
69  // getting all tags @todo - needs to be least greedy ...
70  preg_match_all('/\[(\d|\w)*\]/',$this->pdf,$tags);
71  $this->tags = implode($tags[0]);
72 
73  //
74  $this->output( "date: " . $this->date);
75  $this->output( "year: " . $this->year);
76  $this->output( "month: " . $this->month);
77  $this->output( "day: " . $this->day);
78 
79  $this->output( "company: " . $this->company);
80  $this->output( "recipient: " . $this->recipient);
81  $this->output( "subject: " . $this->subject);
82  $this->output( "amount: " . $this->amount);
83  $this->output( "tags: " . $this->tags);
84 
85  }
86 
90  function checkRules()
91  {
92  $rules = $this->db->getActiveArchiveRules();
93  while ($row = $rules->fetchArray()) {
94  // we have a rule match if the company found matches the specified string
95  // * is the wild card like in file names
96  $match = fnmatch($row['company'], $this->company)
97  && fnmatch($row['subject'], $this->subject)
98  && fnmatch($row['recipient'], $this->recipient)
99  && fnmatch($row['tags'], $this->tags);
100 
101  // if everything matched - go ahead
102  if ($match) {
103  // processing the folder to which document shall be moved
104  $toFolder = $row['toFolder'];
105 
106  // in case the [year] variable has been used etc.
107  $toFolder = str_replace('[year]', $this->year, $toFolder);
108  $toFolder = str_replace('[month]', $this->month, $toFolder);
109  $toFolder = str_replace('[day]', $this->day, $toFolder);
110  $toFolder = str_replace('[recipient]', $this->recipient, $toFolder);
111 
112  // adding a trailing slash in case none existed
113  $toFolder = rtrim($toFolder, '/') . '/';
114 
115  // create folders in case required
116  exec("mkdir -p '$toFolder'");
117 
118  // move the file to destination folder
119  exec('mv --backup=numbered "' . $this->pdf . '" "' . $toFolder . $this->pdf . '"');
120 
121  $this->db->writeLog($this->pdf, $this->pdf, "", "Moved file to: " . $toFolder);
122  }
123  }
124  }
125 
129  function run()
130  {
131 
132  // process the file name first
133  $this->splitUpFilename();
134 
135  // then see if there is any rule to process
136  $this->checkRules();
137  }
138  }
139 
140 
141 // main program
146 $ppyrd = new ppyrd($db);
147 
148 
149 
150 // looping main directory and calling the pdf parser
151 $ppyrd->output("starting paperyard");
152 
153 
154 
158  $ppyrd->checkCliVsWebserver();
159 
160 
161 // move confirmed file so they will be sorted
162 $ppyrd->output("checking for confirmed files ...");
163 
164 // named regex to get tags
165 const ATTRIBUTE_PATTERN = '/^(.*?) - (.*?) - (.*?) \((.*?)\) \(EUR(.*?)\) \[(?<tags>.*?)\] -- (.*?)(?:.pdf)$/';
166 
167 // get both folders and combine folders
168 // \TODO import document class
169 $outbox = glob("/data/outbox/*.pdf");
170 $inbox = glob("/data/inbox/*.pdf");
171 $documents = array_merge($outbox, $inbox);
172 
173 // get filename and the dir its in and strip all tags
174 // separate tags by comma and check if "ok" is present
175 // move to sort if ok tag is contained
176 foreach ($documents as $document) {
177  $basename = basename($document);
178  preg_match(ATTRIBUTE_PATTERN, $basename, $attributes);
179  $tags = explode(',', $attributes['tags']);
180  if (in_array('ok', $tags)) {
181  rename($document, '/data/sort/' . $basename);
182  }
183 }
184 
185 
186 chdir("/data/sort");
187 
191 $pdfs = glob("*.pdf");
192 
196 foreach ($pdfs as $pdf){
197  $pdf=new pdfSorter($pdf, $db);
198  $pdf->run();
199 }
200 
201 /*******************************************************************************/
202 
203 
204 $db->close();
205 
206 ?>
$pdfs
$ppyrd
$documents
checkRules()
checks rules
handling database connection and queries
Definition: dbHandler.php:15
const ATTRIBUTE_PATTERN
run()
runs the main process
$outbox
Sorts thru PDF documents and puts them into corresponding folders etc.
__construct($pdf, $db)
constructor
output($string, $debug=0)
outputs string
Definition: ppyrd.base.php:78
$inbox
splitUpFilename()
function gets from file name the information what the date, company and subject is.