From 87ffae7efce88abeb6c9defcca74c103807ba5f5 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Wed, 2 Apr 2014 18:17:35 -0500 Subject: [PATCH 01/33] Fixes tedivm/Fetch#43 by creating a name for nameless attached emails, allowing them to be seen as attachments rather than merged into the parent email. --- src/Fetch/Message.php | 84 +++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 3755869..800fe7b 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -21,6 +21,19 @@ class Message { /** + * Primary Body Types + * According to http://www.php.net/manual/en/function.imap-fetchstructure.php + */ + const TYPE_TEXT = 0; + const TYPE_MULTIPART = 1; + const TYPE_MESSAGE = 2; + const TYPE_APPLICATION = 3; + const TYPE_AUDIO = 4; + const TYPE_IMAGE = 5; + const TYPE_VIDEO = 6; + const TYPE_OTHER = 7; + + /** * This is the connection/mailbox class that the email came from. * * @var Server @@ -415,6 +428,37 @@ public function getImapBox() { return $this->imapConnection; } + + /** + * Adds an attachment + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return boolean Successful attachment of file + */ + protected function addAttachment($parameters, $structure, $partIdentifier) + { + // make up a filename if none is provided (like Gmail and desktop clients do) + if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + error_log("is message"); + error_log(print_r($structure, true)); + $dpar = new \stdClass(); + $dpar->attribute = "filename"; + $dpar->value = "email.eml"; + $structure->dparameters[] = $dpar; + } + + try { + $attachment = new Attachment($this, $structure, $partIdentifier); + $this->attachments[] = $attachment; + return true; + } catch (Exteption $e) { + error_log("Unable to make attachment"); + return false; + } + } /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the @@ -427,11 +471,14 @@ public function getImapBox() protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); - - if (isset($parameters['name']) || isset($parameters['filename'])) { - $attachment = new Attachment($this, $structure, $partIdentifier); - $this->attachments[] = $attachment; - } elseif ($structure->type == 0 || $structure->type == 1) { + $attached = false; + + if ((isset($structure->disposition) && $structure->disposition == "attachment") && + !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $attached = self::addAttachment($parameters, $structure, $partIdentifier); + } + + if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $messageBody = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) : imap_body($this->imapStream, $this->uid, FT_UID); @@ -441,7 +488,7 @@ protected function processStructure($structure, $partIdentifier = null) if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); - if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) { + if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { $this->plaintextMessage .= PHP_EOL . PHP_EOL; } else { @@ -458,19 +505,18 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - } - - if (isset($structure->parts)) { // multipart: iterate through each part - - foreach ($structure->parts as $partIndex => $part) { - $partId = $partIndex + 1; - - if (isset($partIdentifier)) - $partId = $partIdentifier . '.' . $partId; - - $this->processStructure($part, $partId); - } - } + + if (isset($structure->parts)) { // multipart: iterate through each part + foreach ($structure->parts as $partIndex => $part) { + $partId = $partIndex + 1; + + if (isset($partIdentifier)) + $partId = $partIdentifier . '.' . $partId; + + $this->processStructure($part, $partId); + } + } + } } /** From 39eba97639a68ba75e57bea030a44e6d06338acc Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 7 Apr 2014 11:34:28 -0500 Subject: [PATCH 02/33] Removed error_log statements I mistakenly left in while debugging; corrected spelling error --- src/Fetch/Message.php | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 800fe7b..4a76516 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -442,8 +442,6 @@ protected function addAttachment($parameters, $structure, $partIdentifier) { // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - error_log("is message"); - error_log(print_r($structure, true)); $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = "email.eml"; @@ -454,8 +452,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; return true; - } catch (Exteption $e) { - error_log("Unable to make attachment"); + } catch (Exception $e) { return false; } } From 3ac1f49261e8b36fba0ab47939e5fe418dc121fa Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Thu, 17 Apr 2014 16:34:22 -0500 Subject: [PATCH 03/33] Abstracted messageBody processing from processStructure and enabled extraction of attached .eml file's subject line for use as the filename in addAttachment, with 'email.eml' as fallback. --- src/Fetch/Message.php | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 4a76516..ca27b9c 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -442,9 +442,13 @@ protected function addAttachment($parameters, $structure, $partIdentifier) { // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + $matches = array(); + preg_match('/Subject:\s(.*)\n/', self::processBody($parameters, $structure, $partIdentifier), $matches); + $filename = !empty($matches[1]) ? $matches[1] : "email"; + $dpar = new \stdClass(); $dpar->attribute = "filename"; - $dpar->value = "email.eml"; + $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; $structure->dparameters[] = $dpar; } @@ -456,6 +460,20 @@ protected function addAttachment($parameters, $structure, $partIdentifier) return false; } } + + protected function processBody($parameters, $structure, $partIdentifier) { + $messageBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $messageBody = self::decode($messageBody, $structure->encoding); + + if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { + $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + } + + return $messageBody; + } /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the @@ -476,14 +494,7 @@ protected function processStructure($structure, $partIdentifier = null) } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); - - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + $messageBody = self::processBody($parameters, $structure, $partIdentifier); if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { From 73c26e99cefb4aaaa4580cd402cd9c3d3a3e78ee Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Thu, 17 Apr 2014 16:47:11 -0500 Subject: [PATCH 04/33] Forgot to document the new method. --- src/Fetch/Message.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index ca27b9c..dd2fda3 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -460,7 +460,16 @@ protected function addAttachment($parameters, $structure, $partIdentifier) return false; } } - + /** + * This function extracts the body of an email part, decodes it, + * converts it to the charset of the parent message, and returns the result. + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return string + */ protected function processBody($parameters, $structure, $partIdentifier) { $messageBody = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) From 8281c08e997e06633ab867456e00b0d7e30d918b Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 15:00:57 -0500 Subject: [PATCH 05/33] Some more changes to support pulling the Subject line from a .eml and using it as the filename. --- src/Fetch/Message.php | 68 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index dd2fda3..f4b91b5 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -442,9 +442,9 @@ protected function addAttachment($parameters, $structure, $partIdentifier) { // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - $matches = array(); - preg_match('/Subject:\s(.*)\n/', self::processBody($parameters, $structure, $partIdentifier), $matches); - $filename = !empty($matches[1]) ? $matches[1] : "email"; + $subjectMatches = array(); + preg_match('/Subject:\s?(.*)(?=Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? self::processSubject($subjectMatches[1]) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; @@ -460,6 +460,64 @@ protected function addAttachment($parameters, $structure, $partIdentifier) return false; } } + + /** + * Decodes the email subject line string passed to it + * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable + * + * @param string $subject subject line to be processed and/or decoded + * + * @return string decoded subject line + */ + protected function processSubject($subject) { + xdebug_break(); + $output = ""; + + $encodingMatches = array(); + preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?(.*)/', $subject, $encodingMatches); + + if (count($encodingMatches) > 3) { + array_shift($encodingMatches); // remove input + $charset = array_shift($encodingMatches); // remove charset + $encoding = array_shift($encodingMatches); + $encodedString = array_shift($encodingMatches); + $nextSection = array_shift($encodingMatches); + + switch ($encoding) { + case "Q": // Quoted-Printable + $decodedString = quoted_printable_decode($encodedString); + break; + case "B": // Base64 + $decodedString = base64_decode($encodedString); + break; + default: + $decodedString = ""; + } + + $output .= self::cleanFilename($charset, $decodedString); + + if (!empty($nextSection)) { + $output .= self::processSubject($nextSection); + } + + return $output; + } else if (count($encodingMatches) > 0) { + return $output . $encodingMatches[0]; + } else if (empty($encodingMatches)) { + return $subject; + } + + return $output; + } + + protected function cleanFilename($charset, $rawName) { + // Strip special chars from filename + $sName = preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $rawName); + // Transliterate accented chars to un-accented equivalents + $stName = iconv($charset, "iso-8859-1//TRANSLIT", $sName); + return $stName; + } + /** * This function extracts the body of an email part, decodes it, * converts it to the charset of the parent message, and returns the result. @@ -478,6 +536,7 @@ protected function processBody($parameters, $structure, $partIdentifier) { $messageBody = self::decode($messageBody, $structure->encoding); if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { +// TODO: ERROR HERE!!! $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); } @@ -490,13 +549,14 @@ protected function processBody($parameters, $structure, $partIdentifier) { * * @param \stdClass $structure * @param string $partIdentifier - * @todoa process attachments. */ protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); $attached = false; + // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" + if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $attached = self::addAttachment($parameters, $structure, $partIdentifier); From b225230908260cd331e14ed6a18fcf58597ef4b8 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 18:09:54 -0500 Subject: [PATCH 06/33] Fixed issue with the Subject line parsing regex, which would cause it to succeed on malformed .eml files, but fail on correctly-formatted .eml files. --- src/Fetch/Message.php | 68 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index f4b91b5..3a6d94a 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -208,8 +208,7 @@ public function __construct($messageUniqueId, Server $connection) */ protected function loadMessage() { - - /* First load the message overview information */ + /* First load the message overview information */ if(!is_object($messageOverview = $this->getOverview())) return false; @@ -246,8 +245,24 @@ protected function loadMessage() $this->processStructure($structure); } else { // multipart - foreach ($structure->parts as $id => $part) + foreach ($structure->parts as $id => $part) { + if (!empty($part->description)) { + $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description); + $part->description = $cleanFilename; + foreach($part->parameters as $key => $parameter) { + if ($parameter->attribute === "name") { + $part->parameters[$key]->value = $cleanFilename; + } + } + foreach($part->dparameters as $key => $dparameter) { + if ($dparameter->attribute === "filename") { + $part->dparameters[$key]->value = $cleanFilename; + } + } + } + $this->processStructure($part, $id + 1); + } } return true; @@ -443,8 +458,8 @@ protected function addAttachment($parameters, $structure, $partIdentifier) // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); - preg_match('/Subject:\s?(.*)(?=Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? self::processSubject($subjectMatches[1]) : "email"; + preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; @@ -469,12 +484,11 @@ protected function addAttachment($parameters, $structure, $partIdentifier) * * @return string decoded subject line */ - protected function processSubject($subject) { - xdebug_break(); + protected function processFilename($subject) { $output = ""; $encodingMatches = array(); - preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?(.*)/', $subject, $encodingMatches); + preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); if (count($encodingMatches) > 3) { array_shift($encodingMatches); // remove input @@ -494,10 +508,12 @@ protected function processSubject($subject) { $decodedString = ""; } - $output .= self::cleanFilename($charset, $decodedString); + $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); + + $output .= self::cleanFilename($decodedString); if (!empty($nextSection)) { - $output .= self::processSubject($nextSection); + $output .= self::processFilename($nextSection); } return $output; @@ -510,12 +526,8 @@ protected function processSubject($subject) { return $output; } - protected function cleanFilename($charset, $rawName) { - // Strip special chars from filename - $sName = preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $rawName); - // Transliterate accented chars to un-accented equivalents - $stName = iconv($charset, "iso-8859-1//TRANSLIT", $sName); - return $stName; + protected function cleanFilename($oldName) { + return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } /** @@ -552,7 +564,7 @@ protected function processBody($parameters, $structure, $partIdentifier) { */ protected function processStructure($structure, $partIdentifier = null) { - $parameters = self::getParametersFromStructure($structure); + $parameters = self::getParametersFromStructure($structure); $attached = false; // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" @@ -582,7 +594,7 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - + if (isset($structure->parts)) { // multipart: iterate through each part foreach ($structure->parts as $partIndex => $part) { $partId = $partIndex + 1; @@ -666,14 +678,18 @@ public static function typeIdToString($id) */ public static function getParametersFromStructure($structure) { - $parameters = array(); - if (isset($structure->parameters)) - foreach ($structure->parameters as $parameter) - $parameters[strtolower($parameter->attribute)] = $parameter->value; - - if (isset($structure->dparameters)) - foreach ($structure->dparameters as $parameter) - $parameters[strtolower($parameter->attribute)] = $parameter->value; + $parameters = array(); + if (isset($structure->parameters)) { + foreach ($structure->parameters as $parameter) { + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } + + if (isset($structure->dparameters)) { + foreach ($structure->dparameters as $parameter) { + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } return $parameters; } From 45741afe7518457c40f97335f58e65c28ed90f69 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 18:16:18 -0500 Subject: [PATCH 07/33] syntax error --- src/Fetch/Message.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 3a6d94a..dc05b8b 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -247,7 +247,7 @@ protected function loadMessage() // multipart foreach ($structure->parts as $id => $part) { if (!empty($part->description)) { - $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description); + $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); $part->description = $cleanFilename; foreach($part->parameters as $key => $parameter) { if ($parameter->attribute === "name") { From 85241d7641b62ca4aaaf27dbb6908dc2d915c962 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 18:27:51 -0500 Subject: [PATCH 08/33] Fixing bugs reported by scrutinizer ('The class Fetch\Exception does not exist' and 'It seems like can also be type ; however, does only seem to accept , maybe add an additional type check?'). --- src/Fetch/Message.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index dc05b8b..bd8cda5 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -471,7 +471,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; return true; - } catch (Exception $e) { + } catch (\Exception $e) { return false; } } @@ -490,7 +490,7 @@ protected function processFilename($subject) { $encodingMatches = array(); preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); - if (count($encodingMatches) > 3) { + if (is_array($encodingMatches) && count($encodingMatches) > 3) { array_shift($encodingMatches); // remove input $charset = array_shift($encodingMatches); // remove charset $encoding = array_shift($encodingMatches); From 1a613d499d5808ce54a9f73b1caabc27bb6247ec Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 19:18:42 -0500 Subject: [PATCH 09/33] Converted tabs to 4 spaces. Moved opening braces to the line after the function declaration. --- src/Fetch/Message.php | 339 +++++++++++++++++++++--------------------- 1 file changed, 171 insertions(+), 168 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index bd8cda5..210a751 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -21,19 +21,19 @@ class Message { /** - * Primary Body Types - * According to http://www.php.net/manual/en/function.imap-fetchstructure.php - */ - const TYPE_TEXT = 0; - const TYPE_MULTIPART = 1; - const TYPE_MESSAGE = 2; - const TYPE_APPLICATION = 3; - const TYPE_AUDIO = 4; - const TYPE_IMAGE = 5; - const TYPE_VIDEO = 6; - const TYPE_OTHER = 7; - - /** + * Primary Body Types + * According to http://www.php.net/manual/en/function.imap-fetchstructure.php + */ + const TYPE_TEXT = 0; + const TYPE_MULTIPART = 1; + const TYPE_MESSAGE = 2; + const TYPE_APPLICATION = 3; + const TYPE_AUDIO = 4; + const TYPE_IMAGE = 5; + const TYPE_VIDEO = 6; + const TYPE_OTHER = 7; + + /** * This is the connection/mailbox class that the email came from. * * @var Server @@ -208,7 +208,7 @@ public function __construct($messageUniqueId, Server $connection) */ protected function loadMessage() { - /* First load the message overview information */ + /* First load the message overview information */ if(!is_object($messageOverview = $this->getOverview())) return false; @@ -246,23 +246,23 @@ protected function loadMessage() } else { // multipart foreach ($structure->parts as $id => $part) { - if (!empty($part->description)) { - $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); - $part->description = $cleanFilename; - foreach($part->parameters as $key => $parameter) { - if ($parameter->attribute === "name") { - $part->parameters[$key]->value = $cleanFilename; - } - } - foreach($part->dparameters as $key => $dparameter) { - if ($dparameter->attribute === "filename") { - $part->dparameters[$key]->value = $cleanFilename; - } - } - } - + if (!empty($part->description)) { + $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); + $part->description = $cleanFilename; + foreach($part->parameters as $key => $parameter) { + if ($parameter->attribute === "name") { + $part->parameters[$key]->value = $cleanFilename; + } + } + foreach($part->dparameters as $key => $dparameter) { + if ($dparameter->attribute === "filename") { + $part->dparameters[$key]->value = $cleanFilename; + } + } + } + $this->processStructure($part, $id + 1); - } + } } return true; @@ -443,117 +443,120 @@ public function getImapBox() { return $this->imapConnection; } - - /** - * Adds an attachment - * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * - * @return boolean Successful attachment of file - */ - protected function addAttachment($parameters, $structure, $partIdentifier) - { - // make up a filename if none is provided (like Gmail and desktop clients do) - if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - $subjectMatches = array(); - preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; - - $dpar = new \stdClass(); - $dpar->attribute = "filename"; - $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; - $structure->dparameters[] = $dpar; - } - - try { - $attachment = new Attachment($this, $structure, $partIdentifier); - $this->attachments[] = $attachment; - return true; - } catch (\Exception $e) { - return false; - } - } - - /** - * Decodes the email subject line string passed to it - * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable - * - * @param string $subject subject line to be processed and/or decoded - * - * @return string decoded subject line - */ - protected function processFilename($subject) { - $output = ""; - - $encodingMatches = array(); - preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); - - if (is_array($encodingMatches) && count($encodingMatches) > 3) { - array_shift($encodingMatches); // remove input - $charset = array_shift($encodingMatches); // remove charset - $encoding = array_shift($encodingMatches); - $encodedString = array_shift($encodingMatches); - $nextSection = array_shift($encodingMatches); - - switch ($encoding) { - case "Q": // Quoted-Printable - $decodedString = quoted_printable_decode($encodedString); - break; - case "B": // Base64 - $decodedString = base64_decode($encodedString); - break; - default: - $decodedString = ""; - } - - $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); - - $output .= self::cleanFilename($decodedString); - - if (!empty($nextSection)) { - $output .= self::processFilename($nextSection); - } - - return $output; - } else if (count($encodingMatches) > 0) { - return $output . $encodingMatches[0]; - } else if (empty($encodingMatches)) { - return $subject; - } - - return $output; - } - - protected function cleanFilename($oldName) { - return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); - } - - /** - * This function extracts the body of an email part, decodes it, - * converts it to the charset of the parent message, and returns the result. - * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * - * @return string - */ - protected function processBody($parameters, $structure, $partIdentifier) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); - - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { + + /** + * Adds an attachment + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return boolean Successful attachment of file + */ + protected function addAttachment($parameters, $structure, $partIdentifier) + { + // make up a filename if none is provided (like Gmail and desktop clients do) + if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + $subjectMatches = array(); + preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; + + $dpar = new \stdClass(); + $dpar->attribute = "filename"; + $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; + $structure->dparameters[] = $dpar; + } + + try { + $attachment = new Attachment($this, $structure, $partIdentifier); + $this->attachments[] = $attachment; + return true; + } catch (\Exception $e) { + return false; + } + } + + /** + * Decodes the email subject line string passed to it + * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable + * + * @param string $subject subject line to be processed and/or decoded + * + * @return string decoded subject line + */ + protected function processFilename($subject) + { + $output = ""; + + $encodingMatches = array(); + preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); + + if (is_array($encodingMatches) && count($encodingMatches) > 3) { + array_shift($encodingMatches); // remove input + $charset = array_shift($encodingMatches); // remove charset + $encoding = array_shift($encodingMatches); + $encodedString = array_shift($encodingMatches); + $nextSection = array_shift($encodingMatches); + + switch ($encoding) { + case "Q": // Quoted-Printable + $decodedString = quoted_printable_decode($encodedString); + break; + case "B": // Base64 + $decodedString = base64_decode($encodedString); + break; + default: + $decodedString = ""; + } + + $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); + + $output .= self::cleanFilename($decodedString); + + if (!empty($nextSection)) { + $output .= self::processFilename($nextSection); + } + + return $output; + } else if (count($encodingMatches) > 0) { + return $output . $encodingMatches[0]; + } else if (empty($encodingMatches)) { + return $subject; + } + + return $output; + } + + protected function cleanFilename($oldName) + { + return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); + } + + /** + * This function extracts the body of an email part, decodes it, + * converts it to the charset of the parent message, and returns the result. + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return string + */ + protected function processBody($parameters, $structure, $partIdentifier) + { + $messageBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $messageBody = self::decode($messageBody, $structure->encoding); + + if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { // TODO: ERROR HERE!!! - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); - } - - return $messageBody; - } + $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + } + + return $messageBody; + } /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the @@ -564,15 +567,15 @@ protected function processBody($parameters, $structure, $partIdentifier) { */ protected function processStructure($structure, $partIdentifier = null) { - $parameters = self::getParametersFromStructure($structure); - $attached = false; - - // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" - - if ((isset($structure->disposition) && $structure->disposition == "attachment") && - !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $attached = self::addAttachment($parameters, $structure, $partIdentifier); - } + $parameters = self::getParametersFromStructure($structure); + $attached = false; + + // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" + + if ((isset($structure->disposition) && $structure->disposition == "attachment") && + !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $attached = self::addAttachment($parameters, $structure, $partIdentifier); + } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $messageBody = self::processBody($parameters, $structure, $partIdentifier); @@ -594,18 +597,18 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - - if (isset($structure->parts)) { // multipart: iterate through each part - foreach ($structure->parts as $partIndex => $part) { - $partId = $partIndex + 1; - - if (isset($partIdentifier)) - $partId = $partIdentifier . '.' . $partId; - - $this->processStructure($part, $partId); - } - } - } + + if (isset($structure->parts)) { // multipart: iterate through each part + foreach ($structure->parts as $partIndex => $part) { + $partId = $partIndex + 1; + + if (isset($partIdentifier)) + $partId = $partIdentifier . '.' . $partId; + + $this->processStructure($part, $partId); + } + } + } } /** @@ -678,18 +681,18 @@ public static function typeIdToString($id) */ public static function getParametersFromStructure($structure) { - $parameters = array(); + $parameters = array(); if (isset($structure->parameters)) { foreach ($structure->parameters as $parameter) { - $parameters[strtolower($parameter->attribute)] = $parameter->value; - } - } + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } if (isset($structure->dparameters)) { foreach ($structure->dparameters as $parameter) { - $parameters[strtolower($parameter->attribute)] = $parameter->value; - } - } + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } return $parameters; } From 881a3ff8b47f71adac2778a75e3ad2d97cf9feff Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 13 May 2014 10:45:39 -0500 Subject: [PATCH 10/33] Ran PHP-CS-Fixer. --- autoload.php | 4 +- src/Fetch/Attachment.php | 2 +- src/Fetch/Message.php | 65 +++++++++++++++-------------- src/Fetch/Server.php | 9 ++-- tests/Fetch/Test/AttachmentTest.php | 8 +--- tests/Fetch/Test/MessageTest.php | 8 +--- tests/Fetch/Test/ServerTest.php | 31 +++++++------- tests/bootstrap.php | 9 ++-- 8 files changed, 65 insertions(+), 71 deletions(-) diff --git a/autoload.php b/autoload.php index 7cbce55..97c6b00 100644 --- a/autoload.php +++ b/autoload.php @@ -9,7 +9,7 @@ * file that was distributed with this source code. */ -spl_autoload_register(function($class) { +spl_autoload_register(function ($class) { $base = '/src/'; if (strpos($class, 'Fetch\Test') === 0) { @@ -22,4 +22,4 @@ return true; } -}); \ No newline at end of file +}); diff --git a/src/Fetch/Attachment.php b/src/Fetch/Attachment.php index 3e6c8c3..96f7082 100644 --- a/src/Fetch/Attachment.php +++ b/src/Fetch/Attachment.php @@ -160,7 +160,7 @@ public function getSize() /** * This function returns the object that contains the structure of this attachment. - * + * * @return \stdClass */ public function getStructure() diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 210a751..4ab9c8c 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -32,7 +32,7 @@ class Message const TYPE_IMAGE = 5; const TYPE_VIDEO = 6; const TYPE_OTHER = 7; - + /** * This is the connection/mailbox class that the email came from. * @@ -182,7 +182,6 @@ class Message */ public static $charset = 'UTF-8//TRANSLIT'; - /** * This constructor takes in the uid for the message and the Imap class representing the mailbox the * message should be opened from. This constructor should generally not be called directly, but rather retrieved @@ -211,6 +210,7 @@ protected function loadMessage() /* First load the message overview information */ if(!is_object($messageOverview = $this->getOverview())) + return false; $this->subject = $messageOverview->subject; @@ -249,18 +249,18 @@ protected function loadMessage() if (!empty($part->description)) { $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); $part->description = $cleanFilename; - foreach($part->parameters as $key => $parameter) { + foreach ($part->parameters as $key => $parameter) { if ($parameter->attribute === "name") { $part->parameters[$key]->value = $cleanFilename; } } - foreach($part->dparameters as $key => $dparameter) { + foreach ($part->dparameters as $key => $dparameter) { if ($dparameter->attribute === "filename") { $part->dparameters[$key]->value = $cleanFilename; } } } - + $this->processStructure($part, $id + 1); } } @@ -443,14 +443,14 @@ public function getImapBox() { return $this->imapConnection; } - + /** * Adds an attachment - * - * @param array $parameters + * + * @param array $parameters * @param \stdClass $structure - * @param string $partIdentifier - * + * @param string $partIdentifier + * * @return boolean Successful attachment of file */ protected function addAttachment($parameters, $structure, $partIdentifier) @@ -460,28 +460,29 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $subjectMatches = array(); preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; - + $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; $structure->dparameters[] = $dpar; } - + try { $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; + return true; } catch (\Exception $e) { return false; } } - + /** * Decodes the email subject line string passed to it * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable - * + * * @param string $subject subject line to be processed and/or decoded - * + * * @return string decoded subject line */ protected function processFilename($subject) @@ -508,9 +509,9 @@ protected function processFilename($subject) default: $decodedString = ""; } - + $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); - + $output .= self::cleanFilename($decodedString); if (!empty($nextSection)) { @@ -518,28 +519,28 @@ protected function processFilename($subject) } return $output; - } else if (count($encodingMatches) > 0) { + } elseif (count($encodingMatches) > 0) { return $output . $encodingMatches[0]; - } else if (empty($encodingMatches)) { + } elseif (empty($encodingMatches)) { return $subject; } return $output; } - + protected function cleanFilename($oldName) { return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } - + /** - * This function extracts the body of an email part, decodes it, + * This function extracts the body of an email part, decodes it, * converts it to the charset of the parent message, and returns the result. - * - * @param array $parameters + * + * @param array $parameters * @param \stdClass $structure - * @param string $partIdentifier - * + * @param string $partIdentifier + * * @return string */ protected function processBody($parameters, $structure, $partIdentifier) @@ -554,7 +555,7 @@ protected function processBody($parameters, $structure, $partIdentifier) // TODO: ERROR HERE!!! $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); } - + return $messageBody; } @@ -569,10 +570,10 @@ protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); $attached = false; - + // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" - - if ((isset($structure->disposition) && $structure->disposition == "attachment") && + + if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $attached = self::addAttachment($parameters, $structure, $partIdentifier); } @@ -597,7 +598,7 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - + if (isset($structure->parts)) { // multipart: iterate through each part foreach ($structure->parts as $partIndex => $part) { $partId = $partIndex + 1; @@ -791,9 +792,11 @@ public function setFlag($flag, $enable = true) if ($enable === true) { $this->status[$flag] = true; + return imap_setflag_full($this->imapStream, $this->uid, $imapifiedFlag, ST_UID); } else { unset($this->status[$flag]); + return imap_clearflag_full($this->imapStream, $this->uid, $imapifiedFlag, ST_UID); } } diff --git a/src/Fetch/Server.php b/src/Fetch/Server.php index 34a7bf6..0ef2d04 100644 --- a/src/Fetch/Server.php +++ b/src/Fetch/Server.php @@ -150,12 +150,13 @@ public function setAuthentication($username, $password) /** * This function sets the mailbox to connect to. * - * @param string $mailbox + * @param string $mailbox * @return bool */ public function setMailBox($mailbox = '') { if(!$this->hasMailBox($mailbox)) + return false; $this->mailbox = $mailbox; @@ -374,20 +375,20 @@ public function getMessages($limit = null) /** * Returns the requested email or false if it is not found. * - * @param int $uid + * @param int $uid * @return Message|bool */ public function getMessageByUid($uid) { try { $message = new \Fetch\Message($uid, $this); + return $message; - }catch(\Exception $e){ + } catch (\Exception $e) { return false; } } - /** * This function removes all of the messages flagged for deletion from the mailbox. * diff --git a/tests/Fetch/Test/AttachmentTest.php b/tests/Fetch/Test/AttachmentTest.php index fda5f68..ffec66b 100644 --- a/tests/Fetch/Test/AttachmentTest.php +++ b/tests/Fetch/Test/AttachmentTest.php @@ -11,7 +11,6 @@ namespace Fetch\Test; - /** * @package Fetch * @author Robert Hafner @@ -27,6 +26,7 @@ public static function getAttachments($MessageId) $returnAttachments = array(); foreach($attachments as $attachment) $returnAttachments[$attachment->getFileName()] = $attachment; + return $returnAttachments; } @@ -100,20 +100,16 @@ public function testSaveToDirectory() $this->assertFileExists($filepath); $this->assertEquals(md5(file_get_contents($filepath)), md5($attachment_RCA->getData())); - $attachments = static::getAttachments('6'); $attachment_RCA = $attachments['RCA_Indian_Head_test_pattern.JPG.zip']; $this->assertFalse($attachment_RCA->saveToDirectory('/'), 'Returns false when attempting to save without filesystem permission.'); - - $attachments = static::getAttachments('6'); $attachment_RCA = $attachments['RCA_Indian_Head_test_pattern.JPG.zip']; $this->assertFalse($attachment_RCA->saveToDirectory($filepath), 'Returns false when attempting to save over a file.'); } - - static function tearDownAfterClass() + public static function tearDownAfterClass() { $tmpdir = rtrim(sys_get_temp_dir(), '/') . '/'; $filepath = $tmpdir . 'RCA_Indian_Head_test_pattern.JPG.zip'; diff --git a/tests/Fetch/Test/MessageTest.php b/tests/Fetch/Test/MessageTest.php index 0547414..de9ce25 100644 --- a/tests/Fetch/Test/MessageTest.php +++ b/tests/Fetch/Test/MessageTest.php @@ -12,7 +12,6 @@ namespace Fetch\Test; use Fetch\Message; - /** * @package Fetch * @author Robert Hafner @@ -22,6 +21,7 @@ class MessageTest extends \PHPUnit_Framework_TestCase public static function getMessage($id) { $server = ServerTest::getServer(); + return new \Fetch\Message($id, $server); } @@ -68,7 +68,6 @@ public function testGetMessageBody() $messageNonHTML = $message->getMessageBody(); $this->assertEquals($plaintextTest, md5($messageNonHTML), 'Message returns as plaintext.'); - $messageHTML = $message->getMessageBody(true); $this->assertEquals($convertedHtmlTest, md5($messageHTML), 'Message converts from plaintext to HTML when requested.'); @@ -130,14 +129,12 @@ public function testGetAttachments() $messageWithoutAttachments = static::getMessage('3'); $this->assertFalse($messageWithoutAttachments->getAttachments(), 'getAttachments returns false when no attachments present.'); - $messageWithAttachments = static::getMessage('6'); $attachments = $messageWithAttachments->getAttachments(); $this->assertCount(2, $attachments); foreach($attachments as $attachment) $this->assertInstanceOf('\Fetch\Attachment', $attachment, 'getAttachments returns Fetch\Attachment objects.'); - $attachment = $messageWithAttachments->getAttachments('Test_card.png.zip'); $this->assertInstanceOf('\Fetch\Attachment', $attachment, 'getAttachment returns specified Fetch\Attachment object.'); } @@ -160,7 +157,6 @@ public function testSetFlag() $this->assertTrue($message->setFlag('answered', false), 'setFlag returned true.'); $this->assertFalse($message->checkFlag('answered'), 'Message was successfully unanswered.'); - $message = static::getMessage('2'); $this->assertFalse($message->checkFlag('flagged'), 'Message is not flagged.'); @@ -190,7 +186,6 @@ public function testMoveToMailbox() $server->setMailBox('Sent'); $sentFolderNumStart = $server->numMessages(); - // Switch to "Flagged" folder in order to test that function properly returns to it $this->assertTrue($server->setMailBox('Flagged Email')); @@ -246,5 +241,4 @@ public function testGetParametersFromStructure() } - } diff --git a/tests/Fetch/Test/ServerTest.php b/tests/Fetch/Test/ServerTest.php index a4fb072..4d96247 100644 --- a/tests/Fetch/Test/ServerTest.php +++ b/tests/Fetch/Test/ServerTest.php @@ -21,18 +21,18 @@ class ServerTest extends \PHPUnit_Framework_TestCase { /** * @dataProvider flagsDataProvider - * @param string $expected server string with %host% placeholder - * @param integer $port to use (needed to test behavior on port 143 and 993 from constructor) - * @param array $flags to set/unset ($flag => $value) + * @param string $expected server string with %host% placeholder + * @param integer $port to use (needed to test behavior on port 143 and 993 from constructor) + * @param array $flags to set/unset ($flag => $value) */ public function testFlags($expected, $port, $flags) { $server = new Server(TESTING_SERVER_HOST, $port); - + foreach ($flags as $flag => $value) { $server->setFlag($flag, $value); } - + $this->assertEquals(str_replace('%host%', TESTING_SERVER_HOST, $expected), $server->getServerString()); } @@ -46,8 +46,9 @@ public function testFlagOverwrite() $server->setFlag('TestFlag', 'false'); $this->assertAttributeContains('TestFlag=false', 'flags', $server); } - - public function flagsDataProvider() { + + public function flagsDataProvider() + { return array( array('{%host%:143/novalidate-cert}', 143, array()), array('{%host%:143/validate-cert}', 143, array('validate-cert' => true)), @@ -67,9 +68,9 @@ public function flagsDataProvider() { /** * @dataProvider connectionDataProvider - * @param integer $port to use (needed to test behavior on port 143 and 993 from constructor) - * @param array $flags to set/unset ($flag => $value) - * @param string $message Assertion message + * @param integer $port to use (needed to test behavior on port 143 and 993 from constructor) + * @param array $flags to set/unset ($flag => $value) + * @param string $message Assertion message */ public function testConnection($port, $flags, $message) { @@ -84,7 +85,8 @@ public function testConnection($port, $flags, $message) $this->assertInternalType('resource', $imapSteam, $message); } - public function connectionDataProvider() { + public function connectionDataProvider() + { return array( array(143, array(), 'Connects with default settings.'), array(993, array('novalidate-cert' => true), 'Connects over SSL (self signed).'), @@ -104,7 +106,7 @@ public function testGetMessages() $messages = $server->getMessages(5); $this->assertCount(5, $messages, 'Five messages returned'); - foreach($messages as $message) { + foreach ($messages as $message) { $this->assertInstanceOf('\Fetch\Message', $message, 'Returned values are Messages'); } } @@ -160,7 +162,6 @@ public function testSetOptionsException() $server->setOptions('purple'); } - public function testSetOptions() { $server = Static::getServer(); @@ -168,7 +169,6 @@ public function testSetOptions() $this->assertAttributeEquals(5, 'options', $server); } - public function testExpunge() { $server = Static::getServer(); @@ -185,10 +185,11 @@ public function testExpunge() $this->assertFalse($server->getMessageByUid(12), 'Message successfully expunged'); } - static public function getServer() + public static function getServer() { $server = new Server(TESTING_SERVER_HOST, 143); $server->setAuthentication(TEST_USER, TEST_PASSWORD); + return $server; } } diff --git a/tests/bootstrap.php b/tests/bootstrap.php index 7597f3a..b36e915 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -17,11 +17,10 @@ date_default_timezone_set('UTC'); -if(getenv('TRAVIS')) -{ +if (getenv('TRAVIS')) { define('TESTING_ENVIRONMENT', 'TRAVIS'); define('TESTING_SERVER_HOST', '127.0.0.1'); -}else{ +} else { define('TESTING_ENVIRONMENT', 'VAGRANT'); define('TESTING_SERVER_HOST', '172.31.1.2'); echo 'Initializing Environment using Vagrant' . PHP_EOL; @@ -38,7 +37,7 @@ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" . PHP_EOL . PHP_EOL; $filename = __DIR__ .'/../autoload.php'; require_once $filename; -}else{ +} else { $loader = require_once $filename; $loader->add('Fetch\\Test', __DIR__); -} \ No newline at end of file +} From 5a1150eeeb8d92ea18e42699e897c0ff7b89617f Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 13 May 2014 17:18:40 -0500 Subject: [PATCH 11/33] Subject-to-Filename parsing got confused by DKIM-Signature section of signed email. Changed regex to be more specific and resilient. --- src/Fetch/Message.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 4ab9c8c..24a134e 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -458,8 +458,8 @@ protected function addAttachment($parameters, $structure, $partIdentifier) // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); - preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; + preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? trim(self::processFilename($subjectMatches[1])) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; From e585f161c588cab5bd56249394e8854d1004c4ba Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 13 May 2014 17:30:26 -0500 Subject: [PATCH 12/33] My IDE lost its tabs --> spaces setting, and ended up putting tabs in again. Replaced all tabs with spaces. --- src/Fetch/Message.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 24a134e..8cb0d16 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -458,7 +458,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); - preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); $filename = !empty($subjectMatches[1]) ? trim(self::processFilename($subjectMatches[1])) : "email"; $dpar = new \stdClass(); From ece5dfdac9399e27ef963b15d1a95b427456123e Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 20 May 2014 16:45:01 -0500 Subject: [PATCH 13/33] Made some changes to the processing of email contents and encoded Subject lines. --- src/Fetch/Message.php | 157 ++++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 58 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 8cb0d16..9d56eae 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -247,7 +247,7 @@ protected function loadMessage() // multipart foreach ($structure->parts as $id => $part) { if (!empty($part->description)) { - $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); + $cleanFilename = $this->makeFilenameSafe($part->description); $part->description = $cleanFilename; foreach ($part->parameters as $key => $parameter) { if ($parameter->attribute === "name") { @@ -450,7 +450,6 @@ public function getImapBox() * @param array $parameters * @param \stdClass $structure * @param string $partIdentifier - * * @return boolean Successful attachment of file */ protected function addAttachment($parameters, $structure, $partIdentifier) @@ -459,7 +458,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? trim(self::processFilename($subjectMatches[1])) : "email"; + $filename = !empty($subjectMatches[1]) ? trim($subjectMatches[1]) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; @@ -478,26 +477,86 @@ protected function addAttachment($parameters, $structure, $partIdentifier) } /** - * Decodes the email subject line string passed to it - * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable - * - * @param string $subject subject line to be processed and/or decoded + * This function extracts the body of an email part, strips harmful + * Outlook-specific strings from it, processes any encoded one-liners, + * decodes it, converts it to the charset of the parent message, and + * returns the result. * - * @return string decoded subject line + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * @return string */ - protected function processFilename($subject) + protected function processBody($parameters, $structure, $partIdentifier) { - $output = ""; + $messageBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $messageBody = $this->stripOutlookSpecificStrings($messageBody); + + $messageBody = $this->processEncodedSubject($messageBody); + + $messageBody = self::decode($messageBody, $structure->encoding); - $encodingMatches = array(); - preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); + if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { + $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + } - if (is_array($encodingMatches) && count($encodingMatches) > 3) { - array_shift($encodingMatches); // remove input - $charset = array_shift($encodingMatches); // remove charset - $encoding = array_shift($encodingMatches); - $encodedString = array_shift($encodingMatches); - $nextSection = array_shift($encodingMatches); + return $messageBody; + } + + /** + * Removes "Thread-Topic:" and "Thread-Index:" lines from the message body + * which are placed there by Outlook and mess up the other processing steps + * + * @param string $messageBody + * @return string + */ + protected function stripOutlookSpecificStrings($messageBody) + { + $messageBody = preg_replace('/Thread-Topic:.*$/m', "", $messageBody); + $messageBody = preg_replace('/Thread-Index:.*$/m', "", $messageBody); + + return $messageBody; + } + + /** + * Grabs the encoded strings (usually subject line) from the string passed + * to it, and passes them to decodeSubject() for processing, then replaces + * them in the original string, before returning the modified string + * + * @param string $haystack + * @return string + */ + protected function processEncodedSubject($haystack) + { + $haystack = preg_replace_callback('/=\?([^?]*)\?([^?])\?([^?]*)\?=(.*)$/m', function($encodedStrings) + { + return $this->decodeSubject($encodedStrings); + }, $haystack); + + return $haystack; + } + + /** + * Decodes the email subject line array passed to it. It is designed + * to handle subject lines with special characters encoded in Base64 or + * Quoted-Printable in "=?charset?encoding?content?=" format + * + * @param array $encodedStrings + * @return string + */ + protected function decodeSubject($encodedStrings) + { + $output = ""; + + if (is_array($encodedStrings) && count($encodedStrings) > 3) { + $subject = array_shift($encodedStrings); // remove input + $charset = array_shift($encodedStrings); // remove charset + $encoding = array_shift($encodedStrings); + $encodedString = array_shift($encodedStrings); + $nextSection = array_shift($encodedStrings); switch ($encoding) { case "Q": // Quoted-Printable @@ -510,55 +569,39 @@ protected function processFilename($subject) $decodedString = ""; } - $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); + $decodedString = iconv($charset, self::$charset, $decodedString); - $output .= self::cleanFilename($decodedString); + $output .= $this->makeFilenameSafe($decodedString); - if (!empty($nextSection)) { - $output .= self::processFilename($nextSection); + $test = preg_replace('/\s*/', "", $nextSection); + $test = trim($test); + if ($test != "") { + $output .= $this->processEncodedSubject($nextSection); } return $output; - } elseif (count($encodingMatches) > 0) { - return $output . $encodingMatches[0]; - } elseif (empty($encodingMatches)) { + } elseif (count($encodedStrings) > 0) { + return $output . $encodedStrings[0]; + } elseif (empty($encodedStrings)) { return $subject; } return $output; } - - protected function cleanFilename($oldName) - { - return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); - } - + /** - * This function extracts the body of an email part, decodes it, - * converts it to the charset of the parent message, and returns the result. - * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * + * This function takes in a string to be used as a filename and replaces + * any dangerous characters with underscores to ensure compatibility with + * various file systems + * + * @param string $oldName * @return string */ - protected function processBody($parameters, $structure, $partIdentifier) + protected function makeFilenameSafe($oldName) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); - - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { -// TODO: ERROR HERE!!! - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); - } - - return $messageBody; + return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } - + /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the * message has its own subparts, those are recursively processed using this function. @@ -570,16 +613,14 @@ protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); $attached = false; - - // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" - + if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $attached = self::addAttachment($parameters, $structure, $partIdentifier); + $attached = $this->addAttachment($parameters, $structure, $partIdentifier); } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $messageBody = self::processBody($parameters, $structure, $partIdentifier); + $messageBody = $this->processBody($parameters, $structure, $partIdentifier); if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { @@ -637,7 +678,7 @@ public static function decode($data, $encoding) return $data; } } - + /** * This function returns the body type that an imap integer maps to. * From 707842773c0e7f6ec4001defce85b6540db38d7a Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Fri, 23 May 2014 17:20:04 -0500 Subject: [PATCH 14/33] Simplified encoded Subject-line processing into filename. Skip processing on message/rfc822 attachments in order to avoid mangling the file. --- src/Fetch/Attachment.php | 18 +++-- src/Fetch/Message.php | 143 ++++++++++++++------------------------- 2 files changed, 62 insertions(+), 99 deletions(-) diff --git a/src/Fetch/Attachment.php b/src/Fetch/Attachment.php index 96f7082..c7d13aa 100644 --- a/src/Fetch/Attachment.php +++ b/src/Fetch/Attachment.php @@ -109,20 +109,28 @@ public function __construct(Message $message, $structure, $partIdentifier = null } /** - * This function returns the data of the attachment. Combined with getMimeType() it can be used to directly output - * data to a browser. + * This function returns the data of the attachment. Combined with + * getMimeType() it can be used to directly output data to a browser. + * + * If the attachment file is message/rfc822, skip processing/decoding the + * contents in order to avoid mangling the file. Otherwise, decode as + * normal to ensure other files are handled correctly. * * @return string */ public function getData() { if (!isset($this->data)) { - $messageBody = isset($this->partId) ? + $rawBody = isset($this->partId) ? imap_fetchbody($this->imapStream, $this->messageId, $this->partId, FT_UID) : imap_body($this->imapStream, $this->messageId, FT_UID); - $messageBody = Message::decode($messageBody, $this->encoding); - $this->data = $messageBody; + if (strpos(strtolower($this->mimeType), "rfc822") !== false) { + $this->data = $rawBody; + } else { + $decodedBody = Message::decode($rawBody, $this->encoding); + $this->data = $decodedBody; + } } return $this->data; diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 9d56eae..5e9f584 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -446,6 +446,11 @@ public function getImapBox() /** * Adds an attachment + * + * If a filename is not provided and the attachment is a message/rfc822 + * email, parse the Subject line and use it as the filename. If the Subject + * line is blank or illegible, use a default filename (like Gmail and some + * desktop clients do) * * @param array $parameters * @param \stdClass $structure @@ -454,12 +459,14 @@ public function getImapBox() */ protected function addAttachment($parameters, $structure, $partIdentifier) { - // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - $subjectMatches = array(); - preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? trim($subjectMatches[1]) : "email"; - + $body = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $headers = iconv_mime_decode_headers($body, 0, self::$charset); + $filename = !empty($headers["Subject"]) ? $this->makeFilenameSafe($headers["Subject"]) : "email"; + $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; @@ -487,106 +494,53 @@ protected function addAttachment($parameters, $structure, $partIdentifier) * @param string $partIdentifier * @return string */ - protected function processBody($parameters, $structure, $partIdentifier) + protected function processBody($structure, $partIdentifier) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); + $rawBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); - $messageBody = $this->stripOutlookSpecificStrings($messageBody); + $bodyNoOutlook = $this->stripOutlookSpecificStrings($rawBody); - $messageBody = $this->processEncodedSubject($messageBody); + $decodedBody = self::decode($bodyNoOutlook, $structure->encoding); - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + $inCharset = $inCharset = mb_detect_encoding($decodedBody, array( + "US-ASCII", + "ISO-8859-1", + "UTF-8", + "UTF-7", + "ASCII", + "EUC-JP", + "SJIS", + "eucJP-win", + "SJIS-win", + "JIS", + "ISO-2022-JP", + "UTF-16", + "UTF-32", + "UCS2", + "UCS4") + ); + + if ($inCharset && $inCharset !== self::$charset) { + $decodedBody = iconv($inCharset, self::$charset, $decodedBody); } - return $messageBody; + return $decodedBody; } /** - * Removes "Thread-Topic:" and "Thread-Index:" lines from the message body - * which are placed there by Outlook and mess up the other processing steps + * Removes "Thread-Index:" line from the message body which is placed there + * by Outlook and messes up the other processing steps. * * @param string $messageBody * @return string */ - protected function stripOutlookSpecificStrings($messageBody) - { - $messageBody = preg_replace('/Thread-Topic:.*$/m', "", $messageBody); - $messageBody = preg_replace('/Thread-Index:.*$/m', "", $messageBody); - - return $messageBody; - } - - /** - * Grabs the encoded strings (usually subject line) from the string passed - * to it, and passes them to decodeSubject() for processing, then replaces - * them in the original string, before returning the modified string - * - * @param string $haystack - * @return string - */ - protected function processEncodedSubject($haystack) - { - $haystack = preg_replace_callback('/=\?([^?]*)\?([^?])\?([^?]*)\?=(.*)$/m', function($encodedStrings) - { - return $this->decodeSubject($encodedStrings); - }, $haystack); - - return $haystack; - } - - /** - * Decodes the email subject line array passed to it. It is designed - * to handle subject lines with special characters encoded in Base64 or - * Quoted-Printable in "=?charset?encoding?content?=" format - * - * @param array $encodedStrings - * @return string - */ - protected function decodeSubject($encodedStrings) + protected function stripOutlookSpecificStrings($bodyBefore) { - $output = ""; + $bodyAfter = preg_replace('/Thread-Index:.*$/m', "", $bodyBefore); - if (is_array($encodedStrings) && count($encodedStrings) > 3) { - $subject = array_shift($encodedStrings); // remove input - $charset = array_shift($encodedStrings); // remove charset - $encoding = array_shift($encodedStrings); - $encodedString = array_shift($encodedStrings); - $nextSection = array_shift($encodedStrings); - - switch ($encoding) { - case "Q": // Quoted-Printable - $decodedString = quoted_printable_decode($encodedString); - break; - case "B": // Base64 - $decodedString = base64_decode($encodedString); - break; - default: - $decodedString = ""; - } - - $decodedString = iconv($charset, self::$charset, $decodedString); - - $output .= $this->makeFilenameSafe($decodedString); - - $test = preg_replace('/\s*/', "", $nextSection); - $test = trim($test); - if ($test != "") { - $output .= $this->processEncodedSubject($nextSection); - } - - return $output; - } elseif (count($encodedStrings) > 0) { - return $output . $encodedStrings[0]; - } elseif (empty($encodedStrings)) { - return $subject; - } - - return $output; + return $bodyAfter; } /** @@ -599,7 +553,7 @@ protected function decodeSubject($encodedStrings) */ protected function makeFilenameSafe($oldName) { - return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); + return preg_replace('/[<>"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } /** @@ -611,16 +565,17 @@ protected function makeFilenameSafe($oldName) */ protected function processStructure($structure, $partIdentifier = null) { - $parameters = self::getParametersFromStructure($structure); $attached = false; + // TODO: Get HTML attachments working, too! if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $parameters = self::getParametersFromStructure($structure); $attached = $this->addAttachment($parameters, $structure, $partIdentifier); } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $messageBody = $this->processBody($parameters, $structure, $partIdentifier); + $messageBody = $this->processBody($structure, $partIdentifier); if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { @@ -664,7 +619,7 @@ public static function decode($data, $encoding) { if (!is_numeric($encoding)) $encoding = strtolower($encoding); - + switch ($encoding) { case 'quoted-printable': case 4: From a01932da15e86138766e3d520ec20f6a34efa879 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 27 May 2014 12:19:16 -0500 Subject: [PATCH 15/33] The code was still causing attached HTML files to be inlined. Now anything with a disposition of 'attachment' will be added to the array rather than inlined. --- src/Fetch/Message.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 5e9f584..cf2609e 100644 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -568,8 +568,7 @@ protected function processStructure($structure, $partIdentifier = null) $attached = false; // TODO: Get HTML attachments working, too! - if ((isset($structure->disposition) && $structure->disposition == "attachment") && - !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + if (isset($structure->disposition) && $structure->disposition == "attachment") { $parameters = self::getParametersFromStructure($structure); $attached = $this->addAttachment($parameters, $structure, $partIdentifier); } From 5ee7fc86aa78732e303d9642fe5a4c797499775a Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Wed, 2 Apr 2014 18:17:35 -0500 Subject: [PATCH 16/33] Fixes tedivm/Fetch#43 by creating a name for nameless attached emails, allowing them to be seen as attachments rather than merged into the parent email. --- src/Fetch/Message.php | 84 +++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 6a0b8c6..a5f9196 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -21,6 +21,19 @@ class Message { /** + * Primary Body Types + * According to http://www.php.net/manual/en/function.imap-fetchstructure.php + */ + const TYPE_TEXT = 0; + const TYPE_MULTIPART = 1; + const TYPE_MESSAGE = 2; + const TYPE_APPLICATION = 3; + const TYPE_AUDIO = 4; + const TYPE_IMAGE = 5; + const TYPE_VIDEO = 6; + const TYPE_OTHER = 7; + + /** * This is the connection/mailbox class that the email came from. * * @var Server @@ -432,6 +445,37 @@ public function getImapBox() { return $this->imapConnection; } + + /** + * Adds an attachment + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return boolean Successful attachment of file + */ + protected function addAttachment($parameters, $structure, $partIdentifier) + { + // make up a filename if none is provided (like Gmail and desktop clients do) + if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + error_log("is message"); + error_log(print_r($structure, true)); + $dpar = new \stdClass(); + $dpar->attribute = "filename"; + $dpar->value = "email.eml"; + $structure->dparameters[] = $dpar; + } + + try { + $attachment = new Attachment($this, $structure, $partIdentifier); + $this->attachments[] = $attachment; + return true; + } catch (Exteption $e) { + error_log("Unable to make attachment"); + return false; + } + } /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the @@ -443,11 +487,14 @@ public function getImapBox() protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); - - if (isset($parameters['name']) || isset($parameters['filename'])) { - $attachment = new Attachment($this, $structure, $partIdentifier); - $this->attachments[] = $attachment; - } elseif ($structure->type == 0 || $structure->type == 1) { + $attached = false; + + if ((isset($structure->disposition) && $structure->disposition == "attachment") && + !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $attached = self::addAttachment($parameters, $structure, $partIdentifier); + } + + if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $messageBody = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) : imap_body($this->imapStream, $this->uid, FT_UID); @@ -462,7 +509,7 @@ protected function processStructure($structure, $partIdentifier = null) } } - if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) { + if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { $this->plaintextMessage .= PHP_EOL . PHP_EOL; } else { @@ -479,19 +526,18 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - } - - if (isset($structure->parts)) { // multipart: iterate through each part - - foreach ($structure->parts as $partIndex => $part) { - $partId = $partIndex + 1; - - if (isset($partIdentifier)) - $partId = $partIdentifier . '.' . $partId; - - $this->processStructure($part, $partId); - } - } + + if (isset($structure->parts)) { // multipart: iterate through each part + foreach ($structure->parts as $partIndex => $part) { + $partId = $partIndex + 1; + + if (isset($partIdentifier)) + $partId = $partIdentifier . '.' . $partId; + + $this->processStructure($part, $partId); + } + } + } } /** From c9c5435b0b07b037ebf2178a838763be2b506c45 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 7 Apr 2014 11:34:28 -0500 Subject: [PATCH 17/33] Removed error_log statements I mistakenly left in while debugging; corrected spelling error --- src/Fetch/Message.php | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index a5f9196..0de04c4 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -459,8 +459,6 @@ protected function addAttachment($parameters, $structure, $partIdentifier) { // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - error_log("is message"); - error_log(print_r($structure, true)); $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = "email.eml"; @@ -471,8 +469,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; return true; - } catch (Exteption $e) { - error_log("Unable to make attachment"); + } catch (Exception $e) { return false; } } From d2fd08e70650b29072208301e33534965ad533f2 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Thu, 17 Apr 2014 16:34:22 -0500 Subject: [PATCH 18/33] Abstracted messageBody processing from processStructure and enabled extraction of attached .eml file's subject line for use as the filename in addAttachment, with 'email.eml' as fallback. --- src/Fetch/Message.php | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 0de04c4..1cf1fba 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -459,9 +459,13 @@ protected function addAttachment($parameters, $structure, $partIdentifier) { // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + $matches = array(); + preg_match('/Subject:\s(.*)\n/', self::processBody($parameters, $structure, $partIdentifier), $matches); + $filename = !empty($matches[1]) ? $matches[1] : "email"; + $dpar = new \stdClass(); $dpar->attribute = "filename"; - $dpar->value = "email.eml"; + $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; $structure->dparameters[] = $dpar; } @@ -473,6 +477,20 @@ protected function addAttachment($parameters, $structure, $partIdentifier) return false; } } + + protected function processBody($parameters, $structure, $partIdentifier) { + $messageBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $messageBody = self::decode($messageBody, $structure->encoding); + + if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { + $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + } + + return $messageBody; + } /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the @@ -492,19 +510,7 @@ protected function processStructure($structure, $partIdentifier = null) } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); - - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { - if (function_exists('mb_convert_encoding')) { - $messageBody = mb_convert_encoding($messageBody, self::$charset, $parameters['charset']); - } else { - $messageBody = iconv($parameters['charset'], self::$charset . self::$charsetFlag, $messageBody); - } - } + $messageBody = self::processBody($parameters, $structure, $partIdentifier); if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { From 14ace3823fe164376e168c9c5fe199c2219a8f72 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Thu, 17 Apr 2014 16:47:11 -0500 Subject: [PATCH 19/33] Forgot to document the new method. --- src/Fetch/Message.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 1cf1fba..7dcbcb0 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -477,7 +477,16 @@ protected function addAttachment($parameters, $structure, $partIdentifier) return false; } } - + /** + * This function extracts the body of an email part, decodes it, + * converts it to the charset of the parent message, and returns the result. + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return string + */ protected function processBody($parameters, $structure, $partIdentifier) { $messageBody = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) From 827dea151a9134b3659fdc993a2043451bf8e641 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 15:00:57 -0500 Subject: [PATCH 20/33] Some more changes to support pulling the Subject line from a .eml and using it as the filename. --- src/Fetch/Message.php | 67 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 7dcbcb0..0091050 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -459,9 +459,9 @@ protected function addAttachment($parameters, $structure, $partIdentifier) { // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - $matches = array(); - preg_match('/Subject:\s(.*)\n/', self::processBody($parameters, $structure, $partIdentifier), $matches); - $filename = !empty($matches[1]) ? $matches[1] : "email"; + $subjectMatches = array(); + preg_match('/Subject:\s?(.*)(?=Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? self::processSubject($subjectMatches[1]) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; @@ -477,6 +477,64 @@ protected function addAttachment($parameters, $structure, $partIdentifier) return false; } } + + /** + * Decodes the email subject line string passed to it + * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable + * + * @param string $subject subject line to be processed and/or decoded + * + * @return string decoded subject line + */ + protected function processSubject($subject) { + xdebug_break(); + $output = ""; + + $encodingMatches = array(); + preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?(.*)/', $subject, $encodingMatches); + + if (count($encodingMatches) > 3) { + array_shift($encodingMatches); // remove input + $charset = array_shift($encodingMatches); // remove charset + $encoding = array_shift($encodingMatches); + $encodedString = array_shift($encodingMatches); + $nextSection = array_shift($encodingMatches); + + switch ($encoding) { + case "Q": // Quoted-Printable + $decodedString = quoted_printable_decode($encodedString); + break; + case "B": // Base64 + $decodedString = base64_decode($encodedString); + break; + default: + $decodedString = ""; + } + + $output .= self::cleanFilename($charset, $decodedString); + + if (!empty($nextSection)) { + $output .= self::processSubject($nextSection); + } + + return $output; + } else if (count($encodingMatches) > 0) { + return $output . $encodingMatches[0]; + } else if (empty($encodingMatches)) { + return $subject; + } + + return $output; + } + + protected function cleanFilename($charset, $rawName) { + // Strip special chars from filename + $sName = preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $rawName); + // Transliterate accented chars to un-accented equivalents + $stName = iconv($charset, "iso-8859-1//TRANSLIT", $sName); + return $stName; + } + /** * This function extracts the body of an email part, decodes it, * converts it to the charset of the parent message, and returns the result. @@ -495,6 +553,7 @@ protected function processBody($parameters, $structure, $partIdentifier) { $messageBody = self::decode($messageBody, $structure->encoding); if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { +// TODO: ERROR HERE!!! $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); } @@ -513,6 +572,8 @@ protected function processStructure($structure, $partIdentifier = null) $parameters = self::getParametersFromStructure($structure); $attached = false; + // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" + if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $attached = self::addAttachment($parameters, $structure, $partIdentifier); From 7572fa411d4128ec80166a679af45e5f49fb65d1 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 18:09:54 -0500 Subject: [PATCH 21/33] Fixed issue with the Subject line parsing regex, which would cause it to succeed on malformed .eml files, but fail on correctly-formatted .eml files. --- src/Fetch/Message.php | 68 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 0091050..7991838 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -225,8 +225,7 @@ public function __construct($messageUniqueId, Server $connection) */ protected function loadMessage() { - - /* First load the message overview information */ + /* First load the message overview information */ if(!is_object($messageOverview = $this->getOverview())) @@ -264,8 +263,24 @@ protected function loadMessage() $this->processStructure($structure); } else { // multipart - foreach ($structure->parts as $id => $part) + foreach ($structure->parts as $id => $part) { + if (!empty($part->description)) { + $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description); + $part->description = $cleanFilename; + foreach($part->parameters as $key => $parameter) { + if ($parameter->attribute === "name") { + $part->parameters[$key]->value = $cleanFilename; + } + } + foreach($part->dparameters as $key => $dparameter) { + if ($dparameter->attribute === "filename") { + $part->dparameters[$key]->value = $cleanFilename; + } + } + } + $this->processStructure($part, $id + 1); + } } return true; @@ -460,8 +475,8 @@ protected function addAttachment($parameters, $structure, $partIdentifier) // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); - preg_match('/Subject:\s?(.*)(?=Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? self::processSubject($subjectMatches[1]) : "email"; + preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; @@ -486,12 +501,11 @@ protected function addAttachment($parameters, $structure, $partIdentifier) * * @return string decoded subject line */ - protected function processSubject($subject) { - xdebug_break(); + protected function processFilename($subject) { $output = ""; $encodingMatches = array(); - preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?(.*)/', $subject, $encodingMatches); + preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); if (count($encodingMatches) > 3) { array_shift($encodingMatches); // remove input @@ -511,10 +525,12 @@ protected function processSubject($subject) { $decodedString = ""; } - $output .= self::cleanFilename($charset, $decodedString); + $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); + + $output .= self::cleanFilename($decodedString); if (!empty($nextSection)) { - $output .= self::processSubject($nextSection); + $output .= self::processFilename($nextSection); } return $output; @@ -527,12 +543,8 @@ protected function processSubject($subject) { return $output; } - protected function cleanFilename($charset, $rawName) { - // Strip special chars from filename - $sName = preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $rawName); - // Transliterate accented chars to un-accented equivalents - $stName = iconv($charset, "iso-8859-1//TRANSLIT", $sName); - return $stName; + protected function cleanFilename($oldName) { + return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } /** @@ -569,7 +581,7 @@ protected function processBody($parameters, $structure, $partIdentifier) { */ protected function processStructure($structure, $partIdentifier = null) { - $parameters = self::getParametersFromStructure($structure); + $parameters = self::getParametersFromStructure($structure); $attached = false; // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" @@ -599,7 +611,7 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - + if (isset($structure->parts)) { // multipart: iterate through each part foreach ($structure->parts as $partIndex => $part) { $partId = $partIndex + 1; @@ -684,14 +696,18 @@ public static function typeIdToString($id) */ public static function getParametersFromStructure($structure) { - $parameters = array(); - if (isset($structure->parameters)) - foreach ($structure->parameters as $parameter) - $parameters[strtolower($parameter->attribute)] = $parameter->value; - - if (isset($structure->dparameters)) - foreach ($structure->dparameters as $parameter) - $parameters[strtolower($parameter->attribute)] = $parameter->value; + $parameters = array(); + if (isset($structure->parameters)) { + foreach ($structure->parameters as $parameter) { + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } + + if (isset($structure->dparameters)) { + foreach ($structure->dparameters as $parameter) { + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } return $parameters; } From ebce9b899bb537b789ebac933c9cdd6596dc73cc Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 18:16:18 -0500 Subject: [PATCH 22/33] syntax error --- src/Fetch/Message.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 7991838..c774f45 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -265,7 +265,7 @@ protected function loadMessage() // multipart foreach ($structure->parts as $id => $part) { if (!empty($part->description)) { - $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description); + $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); $part->description = $cleanFilename; foreach($part->parameters as $key => $parameter) { if ($parameter->attribute === "name") { From 3b7398e32388938ec3968c0f514e8c8ac5b090b0 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 18:27:51 -0500 Subject: [PATCH 23/33] Fixing bugs reported by scrutinizer ('The class Fetch\Exception does not exist' and 'It seems like can also be type ; however, does only seem to accept , maybe add an additional type check?'). --- src/Fetch/Message.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index c774f45..2def061 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -488,7 +488,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; return true; - } catch (Exception $e) { + } catch (\Exception $e) { return false; } } @@ -507,7 +507,7 @@ protected function processFilename($subject) { $encodingMatches = array(); preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); - if (count($encodingMatches) > 3) { + if (is_array($encodingMatches) && count($encodingMatches) > 3) { array_shift($encodingMatches); // remove input $charset = array_shift($encodingMatches); // remove charset $encoding = array_shift($encodingMatches); From 1672250612d0979a05aa045c5ba1fb0a9d6119bd Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Mon, 12 May 2014 19:18:42 -0500 Subject: [PATCH 24/33] Converted tabs to 4 spaces. Moved opening braces to the line after the function declaration. --- src/Fetch/Message.php | 339 +++++++++++++++++++++--------------------- 1 file changed, 171 insertions(+), 168 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 2def061..009b3db 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -21,19 +21,19 @@ class Message { /** - * Primary Body Types - * According to http://www.php.net/manual/en/function.imap-fetchstructure.php - */ - const TYPE_TEXT = 0; - const TYPE_MULTIPART = 1; - const TYPE_MESSAGE = 2; - const TYPE_APPLICATION = 3; - const TYPE_AUDIO = 4; - const TYPE_IMAGE = 5; - const TYPE_VIDEO = 6; - const TYPE_OTHER = 7; - - /** + * Primary Body Types + * According to http://www.php.net/manual/en/function.imap-fetchstructure.php + */ + const TYPE_TEXT = 0; + const TYPE_MULTIPART = 1; + const TYPE_MESSAGE = 2; + const TYPE_APPLICATION = 3; + const TYPE_AUDIO = 4; + const TYPE_IMAGE = 5; + const TYPE_VIDEO = 6; + const TYPE_OTHER = 7; + + /** * This is the connection/mailbox class that the email came from. * * @var Server @@ -225,7 +225,7 @@ public function __construct($messageUniqueId, Server $connection) */ protected function loadMessage() { - /* First load the message overview information */ + /* First load the message overview information */ if(!is_object($messageOverview = $this->getOverview())) @@ -264,23 +264,23 @@ protected function loadMessage() } else { // multipart foreach ($structure->parts as $id => $part) { - if (!empty($part->description)) { - $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); - $part->description = $cleanFilename; - foreach($part->parameters as $key => $parameter) { - if ($parameter->attribute === "name") { - $part->parameters[$key]->value = $cleanFilename; - } - } - foreach($part->dparameters as $key => $dparameter) { - if ($dparameter->attribute === "filename") { - $part->dparameters[$key]->value = $cleanFilename; - } - } - } - + if (!empty($part->description)) { + $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); + $part->description = $cleanFilename; + foreach($part->parameters as $key => $parameter) { + if ($parameter->attribute === "name") { + $part->parameters[$key]->value = $cleanFilename; + } + } + foreach($part->dparameters as $key => $dparameter) { + if ($dparameter->attribute === "filename") { + $part->dparameters[$key]->value = $cleanFilename; + } + } + } + $this->processStructure($part, $id + 1); - } + } } return true; @@ -460,117 +460,120 @@ public function getImapBox() { return $this->imapConnection; } - - /** - * Adds an attachment - * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * - * @return boolean Successful attachment of file - */ - protected function addAttachment($parameters, $structure, $partIdentifier) - { - // make up a filename if none is provided (like Gmail and desktop clients do) - if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - $subjectMatches = array(); - preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; - - $dpar = new \stdClass(); - $dpar->attribute = "filename"; - $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; - $structure->dparameters[] = $dpar; - } - - try { - $attachment = new Attachment($this, $structure, $partIdentifier); - $this->attachments[] = $attachment; - return true; - } catch (\Exception $e) { - return false; - } - } - - /** - * Decodes the email subject line string passed to it - * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable - * - * @param string $subject subject line to be processed and/or decoded - * - * @return string decoded subject line - */ - protected function processFilename($subject) { - $output = ""; - - $encodingMatches = array(); - preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); - - if (is_array($encodingMatches) && count($encodingMatches) > 3) { - array_shift($encodingMatches); // remove input - $charset = array_shift($encodingMatches); // remove charset - $encoding = array_shift($encodingMatches); - $encodedString = array_shift($encodingMatches); - $nextSection = array_shift($encodingMatches); - - switch ($encoding) { - case "Q": // Quoted-Printable - $decodedString = quoted_printable_decode($encodedString); - break; - case "B": // Base64 - $decodedString = base64_decode($encodedString); - break; - default: - $decodedString = ""; - } - - $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); - - $output .= self::cleanFilename($decodedString); - - if (!empty($nextSection)) { - $output .= self::processFilename($nextSection); - } - - return $output; - } else if (count($encodingMatches) > 0) { - return $output . $encodingMatches[0]; - } else if (empty($encodingMatches)) { - return $subject; - } - - return $output; - } - - protected function cleanFilename($oldName) { - return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); - } - - /** - * This function extracts the body of an email part, decodes it, - * converts it to the charset of the parent message, and returns the result. - * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * - * @return string - */ - protected function processBody($parameters, $structure, $partIdentifier) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); - - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { + + /** + * Adds an attachment + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return boolean Successful attachment of file + */ + protected function addAttachment($parameters, $structure, $partIdentifier) + { + // make up a filename if none is provided (like Gmail and desktop clients do) + if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + $subjectMatches = array(); + preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; + + $dpar = new \stdClass(); + $dpar->attribute = "filename"; + $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; + $structure->dparameters[] = $dpar; + } + + try { + $attachment = new Attachment($this, $structure, $partIdentifier); + $this->attachments[] = $attachment; + return true; + } catch (\Exception $e) { + return false; + } + } + + /** + * Decodes the email subject line string passed to it + * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable + * + * @param string $subject subject line to be processed and/or decoded + * + * @return string decoded subject line + */ + protected function processFilename($subject) + { + $output = ""; + + $encodingMatches = array(); + preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); + + if (is_array($encodingMatches) && count($encodingMatches) > 3) { + array_shift($encodingMatches); // remove input + $charset = array_shift($encodingMatches); // remove charset + $encoding = array_shift($encodingMatches); + $encodedString = array_shift($encodingMatches); + $nextSection = array_shift($encodingMatches); + + switch ($encoding) { + case "Q": // Quoted-Printable + $decodedString = quoted_printable_decode($encodedString); + break; + case "B": // Base64 + $decodedString = base64_decode($encodedString); + break; + default: + $decodedString = ""; + } + + $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); + + $output .= self::cleanFilename($decodedString); + + if (!empty($nextSection)) { + $output .= self::processFilename($nextSection); + } + + return $output; + } else if (count($encodingMatches) > 0) { + return $output . $encodingMatches[0]; + } else if (empty($encodingMatches)) { + return $subject; + } + + return $output; + } + + protected function cleanFilename($oldName) + { + return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); + } + + /** + * This function extracts the body of an email part, decodes it, + * converts it to the charset of the parent message, and returns the result. + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * + * @return string + */ + protected function processBody($parameters, $structure, $partIdentifier) + { + $messageBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $messageBody = self::decode($messageBody, $structure->encoding); + + if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { // TODO: ERROR HERE!!! - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); - } - - return $messageBody; - } + $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + } + + return $messageBody; + } /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the @@ -581,15 +584,15 @@ protected function processBody($parameters, $structure, $partIdentifier) { */ protected function processStructure($structure, $partIdentifier = null) { - $parameters = self::getParametersFromStructure($structure); - $attached = false; - - // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" - - if ((isset($structure->disposition) && $structure->disposition == "attachment") && - !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $attached = self::addAttachment($parameters, $structure, $partIdentifier); - } + $parameters = self::getParametersFromStructure($structure); + $attached = false; + + // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" + + if ((isset($structure->disposition) && $structure->disposition == "attachment") && + !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $attached = self::addAttachment($parameters, $structure, $partIdentifier); + } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $messageBody = self::processBody($parameters, $structure, $partIdentifier); @@ -611,18 +614,18 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - - if (isset($structure->parts)) { // multipart: iterate through each part - foreach ($structure->parts as $partIndex => $part) { - $partId = $partIndex + 1; - - if (isset($partIdentifier)) - $partId = $partIdentifier . '.' . $partId; - - $this->processStructure($part, $partId); - } - } - } + + if (isset($structure->parts)) { // multipart: iterate through each part + foreach ($structure->parts as $partIndex => $part) { + $partId = $partIndex + 1; + + if (isset($partIdentifier)) + $partId = $partIdentifier . '.' . $partId; + + $this->processStructure($part, $partId); + } + } + } } /** @@ -696,18 +699,18 @@ public static function typeIdToString($id) */ public static function getParametersFromStructure($structure) { - $parameters = array(); + $parameters = array(); if (isset($structure->parameters)) { foreach ($structure->parameters as $parameter) { - $parameters[strtolower($parameter->attribute)] = $parameter->value; - } - } + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } if (isset($structure->dparameters)) { foreach ($structure->dparameters as $parameter) { - $parameters[strtolower($parameter->attribute)] = $parameter->value; - } - } + $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } return $parameters; } From 3540360ac0d1f5b211912ddd23a3aa61ed46b619 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 13 May 2014 10:45:39 -0500 Subject: [PATCH 25/33] Ran PHP-CS-Fixer. --- src/Fetch/Message.php | 79 +++++++++++++++++-------------------------- src/Fetch/Server.php | 3 +- tests/bootstrap.php | 2 +- 3 files changed, 34 insertions(+), 50 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 009b3db..a8acbcd 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -32,7 +32,7 @@ class Message const TYPE_IMAGE = 5; const TYPE_VIDEO = 6; const TYPE_OTHER = 7; - + /** * This is the connection/mailbox class that the email came from. * @@ -182,24 +182,6 @@ class Message */ public static $charset = 'UTF-8'; - /** - * This value defines the flag set for encoding if the mb_convert_encoding - * function can't be found, and in this case iconv encoding will be used. - * - * @var string - */ - public static $charsetFlag = '//TRANSLIT'; - - /** - * These constants can be used to easily access available flags - */ - const FLAG_RECENT = 'recent'; - const FLAG_FLAGGED = 'flagged'; - const FLAG_ANSWERED = 'answered'; - const FLAG_DELETED = 'deleted'; - const FLAG_SEEN = 'seen'; - const FLAG_DRAFT = 'draft'; - /** * This constructor takes in the uid for the message and the Imap class representing the mailbox the * message should be opened from. This constructor should generally not be called directly, but rather retrieved @@ -267,18 +249,18 @@ protected function loadMessage() if (!empty($part->description)) { $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); $part->description = $cleanFilename; - foreach($part->parameters as $key => $parameter) { + foreach ($part->parameters as $key => $parameter) { if ($parameter->attribute === "name") { $part->parameters[$key]->value = $cleanFilename; } } - foreach($part->dparameters as $key => $dparameter) { + foreach ($part->dparameters as $key => $dparameter) { if ($dparameter->attribute === "filename") { $part->dparameters[$key]->value = $cleanFilename; } } } - + $this->processStructure($part, $id + 1); } } @@ -460,14 +442,14 @@ public function getImapBox() { return $this->imapConnection; } - + /** * Adds an attachment - * - * @param array $parameters + * + * @param array $parameters * @param \stdClass $structure - * @param string $partIdentifier - * + * @param string $partIdentifier + * * @return boolean Successful attachment of file */ protected function addAttachment($parameters, $structure, $partIdentifier) @@ -477,28 +459,29 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $subjectMatches = array(); preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; - + $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; $structure->dparameters[] = $dpar; } - + try { $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; + return true; } catch (\Exception $e) { return false; } } - + /** * Decodes the email subject line string passed to it * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable - * + * * @param string $subject subject line to be processed and/or decoded - * + * * @return string decoded subject line */ protected function processFilename($subject) @@ -525,9 +508,9 @@ protected function processFilename($subject) default: $decodedString = ""; } - + $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); - + $output .= self::cleanFilename($decodedString); if (!empty($nextSection)) { @@ -535,28 +518,28 @@ protected function processFilename($subject) } return $output; - } else if (count($encodingMatches) > 0) { + } elseif (count($encodingMatches) > 0) { return $output . $encodingMatches[0]; - } else if (empty($encodingMatches)) { + } elseif (empty($encodingMatches)) { return $subject; } return $output; } - + protected function cleanFilename($oldName) { return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } - + /** - * This function extracts the body of an email part, decodes it, + * This function extracts the body of an email part, decodes it, * converts it to the charset of the parent message, and returns the result. - * - * @param array $parameters + * + * @param array $parameters * @param \stdClass $structure - * @param string $partIdentifier - * + * @param string $partIdentifier + * * @return string */ protected function processBody($parameters, $structure, $partIdentifier) @@ -571,7 +554,7 @@ protected function processBody($parameters, $structure, $partIdentifier) // TODO: ERROR HERE!!! $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); } - + return $messageBody; } @@ -586,10 +569,10 @@ protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); $attached = false; - + // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" - - if ((isset($structure->disposition) && $structure->disposition == "attachment") && + + if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { $attached = self::addAttachment($parameters, $structure, $partIdentifier); } @@ -614,7 +597,7 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - + if (isset($structure->parts)) { // multipart: iterate through each part foreach ($structure->parts as $partIndex => $part) { $partId = $partIndex + 1; diff --git a/src/Fetch/Server.php b/src/Fetch/Server.php index 4fca523..05cb30c 100644 --- a/src/Fetch/Server.php +++ b/src/Fetch/Server.php @@ -155,7 +155,8 @@ public function setAuthentication($username, $password) */ public function setMailBox($mailbox = '') { - if (!$this->hasMailBox($mailbox)) { + if(!$this->hasMailBox($mailbox)) + return false; } diff --git a/tests/bootstrap.php b/tests/bootstrap.php index c656c67..3548183 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -35,6 +35,6 @@ $filename = __DIR__ .'/../autoload.php'; require_once $filename; } else { - $loader = require $filename; + $loader = require_once $filename; $loader->add('Fetch\\Test', __DIR__); } From a4c98f6b5b2c7c7cffc703241a137e4a3ac214f7 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 13 May 2014 17:18:40 -0500 Subject: [PATCH 26/33] Subject-to-Filename parsing got confused by DKIM-Signature section of signed email. Changed regex to be more specific and resilient. --- src/Fetch/Message.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index a8acbcd..3622521 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -457,8 +457,8 @@ protected function addAttachment($parameters, $structure, $partIdentifier) // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); - preg_match('/Subject:\s?(.*?)(?=\s*Thread-Topic:|$)/', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? self::processFilename($subjectMatches[1]) : "email"; + preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + $filename = !empty($subjectMatches[1]) ? trim(self::processFilename($subjectMatches[1])) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; From a44a856c4a22eb958e7cfd0de222f874c18be5fd Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 13 May 2014 17:30:26 -0500 Subject: [PATCH 27/33] My IDE lost its tabs --> spaces setting, and ended up putting tabs in again. Replaced all tabs with spaces. --- src/Fetch/Message.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 3622521..7882427 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -457,7 +457,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); - preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); + preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); $filename = !empty($subjectMatches[1]) ? trim(self::processFilename($subjectMatches[1])) : "email"; $dpar = new \stdClass(); From 1143d4c059bfb27001d66b3f661191412ed27b8c Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 20 May 2014 16:45:01 -0500 Subject: [PATCH 28/33] Made some changes to the processing of email contents and encoded Subject lines. --- src/Fetch/Message.php | 157 ++++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 58 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 7882427..817de28 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -247,7 +247,7 @@ protected function loadMessage() // multipart foreach ($structure->parts as $id => $part) { if (!empty($part->description)) { - $cleanFilename = self::processFilename(preg_replace('/_/', " ", $part->description)); + $cleanFilename = $this->makeFilenameSafe($part->description); $part->description = $cleanFilename; foreach ($part->parameters as $key => $parameter) { if ($parameter->attribute === "name") { @@ -449,7 +449,6 @@ public function getImapBox() * @param array $parameters * @param \stdClass $structure * @param string $partIdentifier - * * @return boolean Successful attachment of file */ protected function addAttachment($parameters, $structure, $partIdentifier) @@ -458,7 +457,7 @@ protected function addAttachment($parameters, $structure, $partIdentifier) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { $subjectMatches = array(); preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? trim(self::processFilename($subjectMatches[1])) : "email"; + $filename = !empty($subjectMatches[1]) ? trim($subjectMatches[1]) : "email"; $dpar = new \stdClass(); $dpar->attribute = "filename"; @@ -477,26 +476,86 @@ protected function addAttachment($parameters, $structure, $partIdentifier) } /** - * Decodes the email subject line string passed to it - * Designed to handle subject lines with special characters encoded in Base64 or Quoted-Printable - * - * @param string $subject subject line to be processed and/or decoded + * This function extracts the body of an email part, strips harmful + * Outlook-specific strings from it, processes any encoded one-liners, + * decodes it, converts it to the charset of the parent message, and + * returns the result. * - * @return string decoded subject line + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * @return string */ - protected function processFilename($subject) + protected function processBody($parameters, $structure, $partIdentifier) { - $output = ""; + $messageBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $messageBody = $this->stripOutlookSpecificStrings($messageBody); + + $messageBody = $this->processEncodedSubject($messageBody); + + $messageBody = self::decode($messageBody, $structure->encoding); - $encodingMatches = array(); - preg_match('/=\?(.[^?]*)\?([BQ])\?(.[^?]*)\?=\s*(.*)/', $subject, $encodingMatches); + if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { + $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + } - if (is_array($encodingMatches) && count($encodingMatches) > 3) { - array_shift($encodingMatches); // remove input - $charset = array_shift($encodingMatches); // remove charset - $encoding = array_shift($encodingMatches); - $encodedString = array_shift($encodingMatches); - $nextSection = array_shift($encodingMatches); + return $messageBody; + } + + /** + * Removes "Thread-Topic:" and "Thread-Index:" lines from the message body + * which are placed there by Outlook and mess up the other processing steps + * + * @param string $messageBody + * @return string + */ + protected function stripOutlookSpecificStrings($messageBody) + { + $messageBody = preg_replace('/Thread-Topic:.*$/m', "", $messageBody); + $messageBody = preg_replace('/Thread-Index:.*$/m', "", $messageBody); + + return $messageBody; + } + + /** + * Grabs the encoded strings (usually subject line) from the string passed + * to it, and passes them to decodeSubject() for processing, then replaces + * them in the original string, before returning the modified string + * + * @param string $haystack + * @return string + */ + protected function processEncodedSubject($haystack) + { + $haystack = preg_replace_callback('/=\?([^?]*)\?([^?])\?([^?]*)\?=(.*)$/m', function($encodedStrings) + { + return $this->decodeSubject($encodedStrings); + }, $haystack); + + return $haystack; + } + + /** + * Decodes the email subject line array passed to it. It is designed + * to handle subject lines with special characters encoded in Base64 or + * Quoted-Printable in "=?charset?encoding?content?=" format + * + * @param array $encodedStrings + * @return string + */ + protected function decodeSubject($encodedStrings) + { + $output = ""; + + if (is_array($encodedStrings) && count($encodedStrings) > 3) { + $subject = array_shift($encodedStrings); // remove input + $charset = array_shift($encodedStrings); // remove charset + $encoding = array_shift($encodedStrings); + $encodedString = array_shift($encodedStrings); + $nextSection = array_shift($encodedStrings); switch ($encoding) { case "Q": // Quoted-Printable @@ -509,55 +568,39 @@ protected function processFilename($subject) $decodedString = ""; } - $decodedString = iconv($charset, "UTF-8//TRANSLIT", $decodedString); + $decodedString = iconv($charset, self::$charset, $decodedString); - $output .= self::cleanFilename($decodedString); + $output .= $this->makeFilenameSafe($decodedString); - if (!empty($nextSection)) { - $output .= self::processFilename($nextSection); + $test = preg_replace('/\s*/', "", $nextSection); + $test = trim($test); + if ($test != "") { + $output .= $this->processEncodedSubject($nextSection); } return $output; - } elseif (count($encodingMatches) > 0) { - return $output . $encodingMatches[0]; - } elseif (empty($encodingMatches)) { + } elseif (count($encodedStrings) > 0) { + return $output . $encodedStrings[0]; + } elseif (empty($encodedStrings)) { return $subject; } return $output; } - - protected function cleanFilename($oldName) - { - return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); - } - + /** - * This function extracts the body of an email part, decodes it, - * converts it to the charset of the parent message, and returns the result. - * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * + * This function takes in a string to be used as a filename and replaces + * any dangerous characters with underscores to ensure compatibility with + * various file systems + * + * @param string $oldName * @return string */ - protected function processBody($parameters, $structure, $partIdentifier) + protected function makeFilenameSafe($oldName) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); - - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { -// TODO: ERROR HERE!!! - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); - } - - return $messageBody; + return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } - + /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the * message has its own subparts, those are recursively processed using this function. @@ -569,16 +612,14 @@ protected function processStructure($structure, $partIdentifier = null) { $parameters = self::getParametersFromStructure($structure); $attached = false; - - // TODO: Process HTML files similarly to .eml files -- prevent them from becoming merged into the main email if their disposition is "attachment" - + if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $attached = self::addAttachment($parameters, $structure, $partIdentifier); + $attached = $this->addAttachment($parameters, $structure, $partIdentifier); } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $messageBody = self::processBody($parameters, $structure, $partIdentifier); + $messageBody = $this->processBody($parameters, $structure, $partIdentifier); if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { @@ -637,7 +678,7 @@ public static function decode($data, $encoding) return $data; } } - + /** * This function returns the body type that an imap integer maps to. * From 79d96ffb1fa2aba63600ffe4da750a678e6464d7 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Fri, 23 May 2014 17:20:04 -0500 Subject: [PATCH 29/33] Simplified encoded Subject-line processing into filename. Skip processing on message/rfc822 attachments in order to avoid mangling the file. --- src/Fetch/Attachment.php | 18 +++-- src/Fetch/Message.php | 141 +++++++++++++-------------------------- 2 files changed, 61 insertions(+), 98 deletions(-) diff --git a/src/Fetch/Attachment.php b/src/Fetch/Attachment.php index 1353571..71332b1 100644 --- a/src/Fetch/Attachment.php +++ b/src/Fetch/Attachment.php @@ -109,20 +109,28 @@ public function __construct(Message $message, $structure, $partIdentifier = null } /** - * This function returns the data of the attachment. Combined with getMimeType() it can be used to directly output - * data to a browser. + * This function returns the data of the attachment. Combined with + * getMimeType() it can be used to directly output data to a browser. + * + * If the attachment file is message/rfc822, skip processing/decoding the + * contents in order to avoid mangling the file. Otherwise, decode as + * normal to ensure other files are handled correctly. * * @return string */ public function getData() { if (!isset($this->data)) { - $messageBody = isset($this->partId) ? + $rawBody = isset($this->partId) ? imap_fetchbody($this->imapStream, $this->messageId, $this->partId, FT_UID) : imap_body($this->imapStream, $this->messageId, FT_UID); - $messageBody = Message::decode($messageBody, $this->encoding); - $this->data = $messageBody; + if (strpos(strtolower($this->mimeType), "rfc822") !== false) { + $this->data = $rawBody; + } else { + $decodedBody = Message::decode($rawBody, $this->encoding); + $this->data = $decodedBody; + } } return $this->data; diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 817de28..5dce05b 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -445,6 +445,11 @@ public function getImapBox() /** * Adds an attachment + * + * If a filename is not provided and the attachment is a message/rfc822 + * email, parse the Subject line and use it as the filename. If the Subject + * line is blank or illegible, use a default filename (like Gmail and some + * desktop clients do) * * @param array $parameters * @param \stdClass $structure @@ -453,12 +458,14 @@ public function getImapBox() */ protected function addAttachment($parameters, $structure, $partIdentifier) { - // make up a filename if none is provided (like Gmail and desktop clients do) if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { - $subjectMatches = array(); - preg_match('/^Subject:\s?([^\n]*)/m', self::processBody($parameters, $structure, $partIdentifier), $subjectMatches); - $filename = !empty($subjectMatches[1]) ? trim($subjectMatches[1]) : "email"; - + $body = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $headers = iconv_mime_decode_headers($body, 0, self::$charset); + $filename = !empty($headers["Subject"]) ? $this->makeFilenameSafe($headers["Subject"]) : "email"; + $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; @@ -486,106 +493,53 @@ protected function addAttachment($parameters, $structure, $partIdentifier) * @param string $partIdentifier * @return string */ - protected function processBody($parameters, $structure, $partIdentifier) + protected function processBody($structure, $partIdentifier) { - $messageBody = isset($partIdentifier) ? - imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) - : imap_body($this->imapStream, $this->uid, FT_UID); + $rawBody = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); - $messageBody = $this->stripOutlookSpecificStrings($messageBody); + $bodyNoOutlook = $this->stripOutlookSpecificStrings($rawBody); - $messageBody = $this->processEncodedSubject($messageBody); + $decodedBody = self::decode($bodyNoOutlook, $structure->encoding); - $messageBody = self::decode($messageBody, $structure->encoding); - - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { - $messageBody = iconv($parameters['charset'], self::$charset, $messageBody); + $inCharset = $inCharset = mb_detect_encoding($decodedBody, array( + "US-ASCII", + "ISO-8859-1", + "UTF-8", + "UTF-7", + "ASCII", + "EUC-JP", + "SJIS", + "eucJP-win", + "SJIS-win", + "JIS", + "ISO-2022-JP", + "UTF-16", + "UTF-32", + "UCS2", + "UCS4") + ); + + if ($inCharset && $inCharset !== self::$charset) { + $decodedBody = iconv($inCharset, self::$charset, $decodedBody); } - return $messageBody; + return $decodedBody; } /** - * Removes "Thread-Topic:" and "Thread-Index:" lines from the message body - * which are placed there by Outlook and mess up the other processing steps + * Removes "Thread-Index:" line from the message body which is placed there + * by Outlook and messes up the other processing steps. * * @param string $messageBody * @return string */ - protected function stripOutlookSpecificStrings($messageBody) - { - $messageBody = preg_replace('/Thread-Topic:.*$/m', "", $messageBody); - $messageBody = preg_replace('/Thread-Index:.*$/m', "", $messageBody); - - return $messageBody; - } - - /** - * Grabs the encoded strings (usually subject line) from the string passed - * to it, and passes them to decodeSubject() for processing, then replaces - * them in the original string, before returning the modified string - * - * @param string $haystack - * @return string - */ - protected function processEncodedSubject($haystack) - { - $haystack = preg_replace_callback('/=\?([^?]*)\?([^?])\?([^?]*)\?=(.*)$/m', function($encodedStrings) - { - return $this->decodeSubject($encodedStrings); - }, $haystack); - - return $haystack; - } - - /** - * Decodes the email subject line array passed to it. It is designed - * to handle subject lines with special characters encoded in Base64 or - * Quoted-Printable in "=?charset?encoding?content?=" format - * - * @param array $encodedStrings - * @return string - */ - protected function decodeSubject($encodedStrings) + protected function stripOutlookSpecificStrings($bodyBefore) { - $output = ""; + $bodyAfter = preg_replace('/Thread-Index:.*$/m', "", $bodyBefore); - if (is_array($encodedStrings) && count($encodedStrings) > 3) { - $subject = array_shift($encodedStrings); // remove input - $charset = array_shift($encodedStrings); // remove charset - $encoding = array_shift($encodedStrings); - $encodedString = array_shift($encodedStrings); - $nextSection = array_shift($encodedStrings); - - switch ($encoding) { - case "Q": // Quoted-Printable - $decodedString = quoted_printable_decode($encodedString); - break; - case "B": // Base64 - $decodedString = base64_decode($encodedString); - break; - default: - $decodedString = ""; - } - - $decodedString = iconv($charset, self::$charset, $decodedString); - - $output .= $this->makeFilenameSafe($decodedString); - - $test = preg_replace('/\s*/', "", $nextSection); - $test = trim($test); - if ($test != "") { - $output .= $this->processEncodedSubject($nextSection); - } - - return $output; - } elseif (count($encodedStrings) > 0) { - return $output . $encodedStrings[0]; - } elseif (empty($encodedStrings)) { - return $subject; - } - - return $output; + return $bodyAfter; } /** @@ -598,7 +552,7 @@ protected function decodeSubject($encodedStrings) */ protected function makeFilenameSafe($oldName) { - return preg_replace('/[<>#%"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); + return preg_replace('/[<>"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } /** @@ -610,16 +564,17 @@ protected function makeFilenameSafe($oldName) */ protected function processStructure($structure, $partIdentifier = null) { - $parameters = self::getParametersFromStructure($structure); $attached = false; + // TODO: Get HTML attachments working, too! if ((isset($structure->disposition) && $structure->disposition == "attachment") && !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $parameters = self::getParametersFromStructure($structure); $attached = $this->addAttachment($parameters, $structure, $partIdentifier); } if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { - $messageBody = $this->processBody($parameters, $structure, $partIdentifier); + $messageBody = $this->processBody($structure, $partIdentifier); if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { From 15e51600c4e85387a751414978c3864d7cfafd7a Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Tue, 27 May 2014 12:19:16 -0500 Subject: [PATCH 30/33] The code was still causing attached HTML files to be inlined. Now anything with a disposition of 'attachment' will be added to the array rather than inlined. --- src/Fetch/Message.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 5dce05b..9a878a0 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -567,8 +567,7 @@ protected function processStructure($structure, $partIdentifier = null) $attached = false; // TODO: Get HTML attachments working, too! - if ((isset($structure->disposition) && $structure->disposition == "attachment") && - !($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + if (isset($structure->disposition) && $structure->disposition == "attachment") { $parameters = self::getParametersFromStructure($structure); $attached = $this->addAttachment($parameters, $structure, $partIdentifier); } From 27c1c88f33df91c9307fb656f235fc09171c5549 Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Wed, 30 Jul 2014 00:51:28 -0500 Subject: [PATCH 31/33] Travis build failed on bootstrap.php because of a change my push/merge/rebase apparently enveloped. --- tests/bootstrap.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bootstrap.php b/tests/bootstrap.php index 3548183..c656c67 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -35,6 +35,6 @@ $filename = __DIR__ .'/../autoload.php'; require_once $filename; } else { - $loader = require_once $filename; + $loader = require $filename; $loader->add('Fetch\\Test', __DIR__); } From b6fa0f4291f17a39fe47beb81900234d0946f2ea Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Wed, 30 Jul 2014 00:58:46 -0500 Subject: [PATCH 32/33] Fixing another error which I don't recall causing. --- src/Fetch/Server.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Fetch/Server.php b/src/Fetch/Server.php index 05cb30c..be42130 100644 --- a/src/Fetch/Server.php +++ b/src/Fetch/Server.php @@ -155,13 +155,11 @@ public function setAuthentication($username, $password) */ public function setMailBox($mailbox = '') { - if(!$this->hasMailBox($mailbox)) + if(!$this->hasMailBox($mailbox)) { return false; } - - $this->mailbox = $mailbox; if (isset($this->imapStream)) { $this->setImapStream(); From 0c3f751c4df0d51f9f88546ef24b6867dadf83aa Mon Sep 17 00:00:00 2001 From: AdrianTP Date: Wed, 30 Jul 2014 01:03:08 -0500 Subject: [PATCH 33/33] Ran PHP-CS-Fixer again. --- src/Fetch/Attachment.php | 8 ++--- src/Fetch/Message.php | 66 ++++++++++++++++++++-------------------- src/Fetch/Server.php | 3 +- 3 files changed, 38 insertions(+), 39 deletions(-) diff --git a/src/Fetch/Attachment.php b/src/Fetch/Attachment.php index 71332b1..ed3c6ca 100644 --- a/src/Fetch/Attachment.php +++ b/src/Fetch/Attachment.php @@ -109,11 +109,11 @@ public function __construct(Message $message, $structure, $partIdentifier = null } /** - * This function returns the data of the attachment. Combined with + * This function returns the data of the attachment. Combined with * getMimeType() it can be used to directly output data to a browser. - * - * If the attachment file is message/rfc822, skip processing/decoding the - * contents in order to avoid mangling the file. Otherwise, decode as + * + * If the attachment file is message/rfc822, skip processing/decoding the + * contents in order to avoid mangling the file. Otherwise, decode as * normal to ensure other files are handled correctly. * * @return string diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 0270b45..6157684 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -445,16 +445,16 @@ public function getImapBox() /** * Adds an attachment - * - * If a filename is not provided and the attachment is a message/rfc822 - * email, parse the Subject line and use it as the filename. If the Subject - * line is blank or illegible, use a default filename (like Gmail and some + * + * If a filename is not provided and the attachment is a message/rfc822 + * email, parse the Subject line and use it as the filename. If the Subject + * line is blank or illegible, use a default filename (like Gmail and some * desktop clients do) * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier - * @return boolean Successful attachment of file + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * @return boolean Successful attachment of file */ protected function addAttachment($parameters, $structure, $partIdentifier) { @@ -462,10 +462,10 @@ protected function addAttachment($parameters, $structure, $partIdentifier) $body = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) : imap_body($this->imapStream, $this->uid, FT_UID); - + $headers = iconv_mime_decode_headers($body, 0, self::$charset); $filename = !empty($headers["Subject"]) ? $this->makeFilenameSafe($headers["Subject"]) : "email"; - + $dpar = new \stdClass(); $dpar->attribute = "filename"; $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; @@ -483,14 +483,14 @@ protected function addAttachment($parameters, $structure, $partIdentifier) } /** - * This function extracts the body of an email part, strips harmful - * Outlook-specific strings from it, processes any encoded one-liners, - * decodes it, converts it to the charset of the parent message, and + * This function extracts the body of an email part, strips harmful + * Outlook-specific strings from it, processes any encoded one-liners, + * decodes it, converts it to the charset of the parent message, and * returns the result. * - * @param array $parameters - * @param \stdClass $structure - * @param string $partIdentifier + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier * @return string */ protected function processBody($structure, $partIdentifier) @@ -498,11 +498,11 @@ protected function processBody($structure, $partIdentifier) $rawBody = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) : imap_body($this->imapStream, $this->uid, FT_UID); - + $bodyNoOutlook = $this->stripOutlookSpecificStrings($rawBody); - + $decodedBody = self::decode($bodyNoOutlook, $structure->encoding); - + $inCharset = $inCharset = mb_detect_encoding($decodedBody, array( "US-ASCII", "ISO-8859-1", @@ -520,41 +520,41 @@ protected function processBody($structure, $partIdentifier) "UCS2", "UCS4") ); - + if ($inCharset && $inCharset !== self::$charset) { $decodedBody = iconv($inCharset, self::$charset, $decodedBody); } return $decodedBody; } - + /** - * Removes "Thread-Index:" line from the message body which is placed there + * Removes "Thread-Index:" line from the message body which is placed there * by Outlook and messes up the other processing steps. - * - * @param string $messageBody + * + * @param string $messageBody * @return string */ protected function stripOutlookSpecificStrings($bodyBefore) { $bodyAfter = preg_replace('/Thread-Index:.*$/m', "", $bodyBefore); - + return $bodyAfter; } - + /** - * This function takes in a string to be used as a filename and replaces - * any dangerous characters with underscores to ensure compatibility with + * This function takes in a string to be used as a filename and replaces + * any dangerous characters with underscores to ensure compatibility with * various file systems - * - * @param string $oldName + * + * @param string $oldName * @return string */ protected function makeFilenameSafe($oldName) { return preg_replace('/[<>"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); } - + /** * This function takes in a structure and identifier and processes that part of the message. If that portion of the * message has its own subparts, those are recursively processed using this function. @@ -565,7 +565,7 @@ protected function makeFilenameSafe($oldName) protected function processStructure($structure, $partIdentifier = null) { $attached = false; - + // TODO: Get HTML attachments working, too! if (isset($structure->disposition) && $structure->disposition == "attachment") { $parameters = self::getParametersFromStructure($structure); @@ -632,7 +632,7 @@ public static function decode($data, $encoding) return $data; } } - + /** * This function returns the body type that an imap integer maps to. * diff --git a/src/Fetch/Server.php b/src/Fetch/Server.php index be42130..71ccdd8 100644 --- a/src/Fetch/Server.php +++ b/src/Fetch/Server.php @@ -155,8 +155,7 @@ public function setAuthentication($username, $password) */ public function setMailBox($mailbox = '') { - if(!$this->hasMailBox($mailbox)) { - + if (!$this->hasMailBox($mailbox)) { return false; }