2 require_once(
'dbHandler.php');
3 require_once(
'ppyrd.base.php');
40 if (!preg_match(
'(ddatum|ffirma|bbetreff|wwer|bbetrag)',$pdf) && !preg_match(
'(^\d{8}\s\-)',$pdf)) {
42 $this->newName=$this->db->getConfigValue(
'newFilenameStructure');
44 if (
$db->getConfigValue(
'appendOldFilename')==1) {
45 $this->newName .= $pdf;
47 $this->newName .=
".pdf";
56 $this->companyName =
"";
57 $this->subjectName =
"";
65 $this->companyMatchRating = $this->db->getConfigValue(
"companyMatchRating");
68 $this->subjectMatchRating = $this->db->getConfigValue(
"subjectMatchRating");
70 $this->dateRegEx = $this->db->getConfigValue(
"dateRegEx");
83 exec(
'pdftotext -layout "' . $pdf .
'" -', $this->content);
94 $this->log .= $str .
"\n";
105 self::addtolog(
"Date found $item");
106 if (strpos($item,
"im") !==
false) {
107 $item =str_replace(
"im ",
"",$item);
108 $item = date(
"Ymt", strtotime($item));
110 $item = date(
"Ymd", strtotime($item));
124 foreach ($dates as $date) {
125 if ($date<=date(
'Ymd'))
140 $this->content = implode(
" ", $this->content);
143 $this->content = strtolower($this->content);
146 $this->content = preg_replace($this->db->getConfigValue(
'stripCharactersFromContent'),
" ", $this->content);
149 $this->content = preg_replace(
"/\s\s+/",
" ", $this->content);
164 $this->
addToLog(
'LOOKING FOR DATES');
167 preg_match_all ($this->dateRegEx, $this->content, $dates);
170 $dates = array_unique($dates[0]);
173 array_walk($dates,
'self::toDate');
174 $dates = array_unique($dates);
180 $this->newName = str_replace(
"ddatum",$this->newDate, $this->newName);
192 $results = $this->db->getPersonalVariables();
195 while ($row = $results->fetchArray()) {
196 $searchTerm= str_replace($row[
'variableName'], $row[
'replaceWith'], $searchTerm);
213 $results = $this->db->getActiveSenders();
215 $tmpMatchedCompanyTags= array();
218 while ($row = $results->fetchArray()) {
223 if (strpos($row[
'foundWords'],
",")===
false) {
228 if (substr_count($this->content, $searchTerms)>0) {
229 @$company[$row[
'fileCompany']] += $row[
'companyScore'];
231 $tmpMatchedCompanyTags[$row[
'fileCompany']][]=$row[
'tags'];
241 $split = explode(
',', strtolower($row[
'foundWords']));
245 foreach ($split as $value) {
252 $cfound = substr_count($this->content, $value);
264 @$company[$row[
'fileCompany']] += $row[
'companyScore'];
266 $tmpMatchedCompanyTags[$row[
'fileCompany']][]=$row[
'tags'];
269 $this->
addToLog(
'"' . $row[
'foundWords'] .
'" ' .
" found - " . $row[
'companyScore'] .
" points for company " . $row[
'fileCompany']);
283 if (isset($company[key($company)])) {
284 $companyMatchRating = $company[key($company)];
285 $this->companyName = key($company);
286 $this->matchedCompanyTags = $tmpMatchedCompanyTags[$this->companyName];
289 $this->
output(
"company: " . $this->companyName .
" scored " . $companyMatchRating);
291 if ($companyMatchRating >= $this->companyMatchRating) {
292 $this->newName = str_replace(
"ffirma",$this->companyName, $this->newName);
306 preg_match_all($this->db->getConfigValue(
'matchPriceRegex'), $this->content, $results);
309 $prices = array_values($results[0]);
312 $prices = preg_replace(
"/[^0-9,.]/",
"", $prices);
314 foreach ($prices as $price) {
315 $price = floatval(str_replace(
',',
'.',str_replace(
'.',
'', $price)));
316 if ($price > $maxprice) $maxprice = $price;
320 $this->price=number_format($maxprice,2,
",",
".");
322 $this->newName = str_replace(
"bbetrag",
"EUR".$this->price, $this->newName);
324 $this->
output(
"amount: EUR" . $this->price);
334 $results = $this->db->getActiveSubjects();
337 $tmpMatchedSubjectTags = array();
340 while ($row = $results->fetchArray()) {
344 @$tmpFoundCompany = trim($row[
'foundCompany']);
345 if ($tmpFoundCompany== $this->companyName || empty($tmpFoundCompany)) {
351 if (strpos($row[
'foundWords'],
",")===
false) {
355 if (substr_count($this->content, $searchTerm)>0) {
356 @$subject[$row[
'fileSubject']] += $row[
'subjectScore'];
359 $tmpMatchedSubjectTags[$row[
'fileSubject']][]=$row[
'tags'];
368 $split = explode(
',', strtolower($row[
'foundWords']));
372 foreach ($split as $value) {
379 $cfound = substr_count($this->content, $value);
391 @$subject[$row[
'fileSubject']] += $row[
'subjectScore'];
393 $tmpMatchedSubjectTags[$row[
'fileSubject']][]=$row[
'tags'];
395 $this->
addToLog(
'"' . $row[
'foundWords'] .
'" ' .
" found - " . $row[
'subjectScore'] .
" points for subject " . $row[
'fileSubject']);
409 @$subjectMatchRating = $subject[key($subject)];
410 $this->subjectName = key($subject);
411 @$this->matchedSubjectTags = $tmpMatchedSubjectTags[$this->subjectName];
414 $this->
output(
"subject: " . $this->subjectName .
" scored " . $subjectMatchRating);
416 if ($subjectMatchRating >= $this->subjectMatchRating) {
417 $this->newName = str_replace(
"bbetreff",$this->subjectName, $this->newName);
434 $results = $this->db->getActiveRecipients();
435 $recipients = array();
438 while ($row = $results->fetchArray()) {
439 $cfound = substr_count($this->content, strtolower($row[
'recipientName']));
440 $this->
output(
"look for " . $row[
'recipientName'] .
" found $cfound", 1);
441 @$recipients[$row[
'shortNameForFile']] += $cfound;
448 foreach ($recipients as $name => $score) {
449 if ($score == 0) unset($recipients[$name]);
454 $recipients = implode(
',',array_flip($recipients));
457 if (!empty($recipients))
458 $this->newName = str_replace(
"wwer",$recipients, $this->newName);
466 @$alltags = array_merge($this->matchedCompanyTags, $this->matchedSubjectTags);
469 @$tags = explode(
',',join(
",", $alltags));
473 foreach ($tags as $tag) {
476 $cleantags[] =
"[$tag]";
480 if (is_array($cleantags)) {
481 $cleantags = array_unique($cleantags);
487 $this->tags=implode($cleantags);
489 $this->
output(
"tags: " . $this->tags);
492 $this->
output(
"tags: no tags to assign");
496 if (!empty($this->tags))
497 $this->newName = str_replace(
"[nt]",$this->tags, $this->newName);
535 if (!preg_match(
'(ddatum|ffirma|bbetreff|wwer|bbetrag)',$this->newName)) {
536 exec(
'mv --backup=numbered "' . $this->oldName .
'" "../outbox/' . $this->newName .
'"');
540 if ($this->oldName != $this->newName) {
541 exec(
'mv --backup=numbered "' . $this->oldName .
'" "' . $this->newName .
'"');
545 $this->
output(
"new name: " . $this->newName);
549 $this->db->writeLog($this->oldName, $this->newName, $this->content, $this->log);
564 $ppyrd->output(
"starting paperyard");
571 $ppyrd->checkCliVsWebserver();
576 chdir(
"/data/inbox");
580 foreach(
$pdfs as $pdf){
getTextFromPdf($pdf)
function executes pdftotext to extract text from file
addTags()
function adds tags once company and subject are correctly matched
matchSenders()
reads rulesets from database and executes accordingly
handling database connection and queries
run()
main function calling relevant process steps to identify document
toDate(&$item, $key)
converts a text string to a date. used for array walk in matchDates
__construct($pdf, $db)
constructor for the class
matchSubjects()
matching subject
matchRecipients()
reads recipient list from database and tries to match in text
cleanContent()
takes the PDF content and cleans it up
matchDates()
looks regular expression dates in the content of the file
output($string, $debug=0)
outputs string
replacePersonalVariables($searchTerm)
replaces the personal variables within a string
closestDateToToday($dates)
takes an array of dates and returns the closest one before today (since Paper documents have dates in...
takes care of the correct naming of files
matchPrice()
checks if there is a price in the text