takes care of the correct naming of files
More...
takes care of the correct naming of files
Definition at line 16 of file ppyrd.namer.php.
pdfNamer::__construct |
( |
|
$pdf, |
|
|
|
$db |
|
) |
| |
constructor for the class
- Parameters
-
string | $pdf | with file name to process |
string | $db | with database connection |
- Returns
- none
- Bug:
- this still checks for a date at the start - but this may change according to configured value.
Definition at line 24 of file ppyrd.namer.php.
40 if (!preg_match(
'(ddatum|ffirma|bbetreff|wwer|bbetrag)',$pdf) && !preg_match(
'(^\d{8}\s\-)',$pdf)) {
42 $this->newName=$this->db->getConfigValue(
'newFilenameStructure');
44 if (
$db->getConfigValue(
'appendOldFilename')==1) {
45 $this->newName .= $pdf;
47 $this->newName .=
".pdf";
56 $this->companyName =
"";
57 $this->subjectName =
"";
65 $this->companyMatchRating = $this->db->getConfigValue(
"companyMatchRating");
68 $this->subjectMatchRating = $this->db->getConfigValue(
"subjectMatchRating");
70 $this->dateRegEx = $this->db->getConfigValue(
"dateRegEx");
getTextFromPdf($pdf)
function executes pdftotext to extract text from file
function adds tags once company and subject are correctly matched
Definition at line 464 of file ppyrd.namer.php.
466 @$alltags = array_merge($this->matchedCompanyTags, $this->matchedSubjectTags);
469 @$tags = explode(
',',join(
",", $alltags));
473 foreach ($tags as $tag) {
476 $cleantags[] =
"[$tag]";
480 if (is_array($cleantags)) {
481 $cleantags = array_unique($cleantags);
487 $this->tags=implode($cleantags);
489 $this->
output(
"tags: " . $this->tags);
492 $this->
output(
"tags: no tags to assign");
496 if (!empty($this->tags))
497 $this->newName = str_replace(
"[nt]",$this->tags, $this->newName);
output($string, $debug=0)
outputs string
pdfNamer::addToLog |
( |
|
$str | ) |
|
adds a string to a log message which later can be written to DB out to STDOUT
- Parameters
-
string | $str | contains the message which shall be added to log |
- Returns
- none
Definition at line 93 of file ppyrd.namer.php.
94 $this->log .= $str .
"\n";
pdfNamer::cleanContent |
( |
| ) |
|
takes the PDF content and cleans it up
- Parameters
-
- Returns
- none
Definition at line 137 of file ppyrd.namer.php.
140 $this->content = implode(
" ", $this->content);
143 $this->content = strtolower($this->content);
146 $this->content = preg_replace($this->db->getConfigValue(
'stripCharactersFromContent'),
" ", $this->content);
149 $this->content = preg_replace(
"/\s\s+/",
" ", $this->content);
pdfNamer::closestDateToToday |
( |
|
$dates | ) |
|
takes an array of dates and returns the closest one before today (since Paper documents have dates in the past, not in the future)
- Parameters
-
array | $dates | containing all dates in YYYYMMDD format |
- Returns
- string YYYYMMDD if match or ddatum if failed to match a date
Definition at line 121 of file ppyrd.namer.php.
124 foreach ($dates as $date) {
125 if ($date<=date(
'Ymd'))
pdfNamer::getTextFromPdf |
( |
|
$pdf | ) |
|
function executes pdftotext to extract text from file
- Parameters
-
Definition at line 81 of file ppyrd.namer.php.
83 exec(
'pdftotext -layout "' . $pdf .
'" -', $this->content);
looks regular expression dates in the content of the file
- Parameters
-
- Returns
- none
Definition at line 160 of file ppyrd.namer.php.
164 $this->
addToLog(
'LOOKING FOR DATES');
167 preg_match_all ($this->dateRegEx, $this->content, $dates);
170 $dates = array_unique($dates[0]);
173 array_walk($dates,
'self::toDate');
174 $dates = array_unique($dates);
180 $this->newName = str_replace(
"ddatum",$this->newDate, $this->newName);
closestDateToToday($dates)
takes an array of dates and returns the closest one before today (since Paper documents have dates in...
checks if there is a price in the text
Definition at line 303 of file ppyrd.namer.php.
306 preg_match_all($this->db->getConfigValue(
'matchPriceRegex'), $this->content, $results);
309 $prices = array_values($results[0]);
312 $prices = preg_replace(
"/[^0-9,.]/",
"", $prices);
314 foreach ($prices as $price) {
315 $price = floatval(str_replace(
',',
'.',str_replace(
'.',
'', $price)));
316 if ($price > $maxprice) $maxprice = $price;
320 $this->price=number_format($maxprice,2,
",",
".");
322 $this->newName = str_replace(
"bbetrag",
"EUR".$this->price, $this->newName);
324 $this->
output(
"amount: EUR" . $this->price);
output($string, $debug=0)
outputs string
pdfNamer::matchRecipients |
( |
| ) |
|
reads recipient list from database and tries to match in text
- Parameters
-
- Returns
- none
Definition at line 432 of file ppyrd.namer.php.
434 $results = $this->db->getActiveRecipients();
435 $recipients = array();
438 while ($row = $results->fetchArray()) {
439 $cfound = substr_count($this->content, strtolower($row[
'recipientName']));
440 $this->
output(
"look for " . $row[
'recipientName'] .
" found $cfound", 1);
441 @$recipients[$row[
'shortNameForFile']] += $cfound;
448 foreach ($recipients as $name => $score) {
449 if ($score == 0) unset($recipients[$name]);
454 $recipients = implode(
',',array_flip($recipients));
457 if (!empty($recipients))
458 $this->newName = str_replace(
"wwer",$recipients, $this->newName);
output($string, $debug=0)
outputs string
pdfNamer::matchSenders |
( |
| ) |
|
reads rulesets from database and executes accordingly
- Parameters
-
- Returns
- none
Definition at line 210 of file ppyrd.namer.php.
213 $results = $this->db->getActiveSenders();
215 $tmpMatchedCompanyTags= array();
218 while ($row = $results->fetchArray()) {
223 if (strpos($row[
'foundWords'],
",")===
false) {
228 if (substr_count($this->content, $searchTerms)>0) {
229 @$company[$row[
'fileCompany']] += $row[
'companyScore'];
231 $tmpMatchedCompanyTags[$row[
'fileCompany']][]=$row[
'tags'];
241 $split = explode(
',', strtolower($row[
'foundWords']));
245 foreach ($split as $value) {
252 $cfound = substr_count($this->content, $value);
264 @$company[$row[
'fileCompany']] += $row[
'companyScore'];
266 $tmpMatchedCompanyTags[$row[
'fileCompany']][]=$row[
'tags'];
269 $this->
addToLog(
'"' . $row[
'foundWords'] .
'" ' .
" found - " . $row[
'companyScore'] .
" points for company " . $row[
'fileCompany']);
283 if (isset($company[key($company)])) {
284 $companyMatchRating = $company[key($company)];
285 $this->companyName = key($company);
286 $this->matchedCompanyTags = $tmpMatchedCompanyTags[$this->companyName];
289 $this->
output(
"company: " . $this->companyName .
" scored " . $companyMatchRating);
291 if ($companyMatchRating >= $this->companyMatchRating) {
292 $this->newName = str_replace(
"ffirma",$this->companyName, $this->newName);
output($string, $debug=0)
outputs string
replacePersonalVariables($searchTerm)
replaces the personal variables within a string
pdfNamer::matchSubjects |
( |
| ) |
|
matching subject
Definition at line 332 of file ppyrd.namer.php.
334 $results = $this->db->getActiveSubjects();
337 $tmpMatchedSubjectTags = array();
340 while ($row = $results->fetchArray()) {
344 @$tmpFoundCompany = trim($row[
'foundCompany']);
345 if ($tmpFoundCompany== $this->companyName || empty($tmpFoundCompany)) {
351 if (strpos($row[
'foundWords'],
",")===
false) {
355 if (substr_count($this->content, $searchTerm)>0) {
356 @$subject[$row[
'fileSubject']] += $row[
'subjectScore'];
359 $tmpMatchedSubjectTags[$row[
'fileSubject']][]=$row[
'tags'];
368 $split = explode(
',', strtolower($row[
'foundWords']));
372 foreach ($split as $value) {
379 $cfound = substr_count($this->content, $value);
391 @$subject[$row[
'fileSubject']] += $row[
'subjectScore'];
393 $tmpMatchedSubjectTags[$row[
'fileSubject']][]=$row[
'tags'];
395 $this->
addToLog(
'"' . $row[
'foundWords'] .
'" ' .
" found - " . $row[
'subjectScore'] .
" points for subject " . $row[
'fileSubject']);
409 @$subjectMatchRating = $subject[key($subject)];
410 $this->subjectName = key($subject);
411 @$this->matchedSubjectTags = $tmpMatchedSubjectTags[$this->subjectName];
414 $this->
output(
"subject: " . $this->subjectName .
" scored " . $subjectMatchRating);
416 if ($subjectMatchRating >= $this->subjectMatchRating) {
417 $this->newName = str_replace(
"bbetreff",$this->subjectName, $this->newName);
output($string, $debug=0)
outputs string
replacePersonalVariables($searchTerm)
replaces the personal variables within a string
pdfNamer::replacePersonalVariables |
( |
|
$searchTerm | ) |
|
replaces the personal variables within a string
- Parameters
-
Definition at line 189 of file ppyrd.namer.php.
192 $results = $this->db->getPersonalVariables();
195 while ($row = $results->fetchArray()) {
196 $searchTerm= str_replace($row[
'variableName'], $row[
'replaceWith'], $searchTerm);
main function calling relevant process steps to identify document
- Parameters
-
- Returns
- none
Definition at line 508 of file ppyrd.namer.php.
535 if (!preg_match(
'(ddatum|ffirma|bbetreff|wwer|bbetrag)',$this->newName)) {
536 exec(
'mv --backup=numbered "' . $this->oldName .
'" "../outbox/' . $this->newName .
'"');
540 if ($this->oldName != $this->newName) {
541 exec(
'mv --backup=numbered "' . $this->oldName .
'" "' . $this->newName .
'"');
545 $this->
output(
"new name: " . $this->newName);
549 $this->db->writeLog($this->oldName, $this->newName, $this->content, $this->log);
addTags()
function adds tags once company and subject are correctly matched
matchSenders()
reads rulesets from database and executes accordingly
matchSubjects()
matching subject
matchRecipients()
reads recipient list from database and tries to match in text
cleanContent()
takes the PDF content and cleans it up
matchDates()
looks regular expression dates in the content of the file
output($string, $debug=0)
outputs string
matchPrice()
checks if there is a price in the text
pdfNamer::toDate |
( |
& |
$item, |
|
|
|
$key |
|
) |
| |
converts a text string to a date. used for array walk in matchDates
- Parameters
-
pointer | $item | pointer to array item |
string | $key | array key name |
- Returns
- none
Definition at line 104 of file ppyrd.namer.php.
105 self::addtolog(
"Date found $item");
106 if (strpos($item,
"im") !==
false) {
107 $item =str_replace(
"im ",
"",$item);
108 $item = date(
"Ymt", strtotime($item));
110 $item = date(
"Ymd", strtotime($item));
The documentation for this class was generated from the following file: