From efbddf10eaa08c76ab12b5c902f93312aed1a9a1 Mon Sep 17 00:00:00 2001 From: graureiher Date: Wed, 8 Feb 2023 09:13:08 +0100 Subject: [PATCH] #2398 Added processing of .zip attachments including files contained --- models/attachment.go | 177 +++++++++++++++++++++++++++++-------------- 1 file changed, 122 insertions(+), 55 deletions(-) diff --git a/models/attachment.go b/models/attachment.go index f008e74d..c808b5f8 100644 --- a/models/attachment.go +++ b/models/attachment.go @@ -46,6 +46,93 @@ func (a Attachment) Validate() error { return err } +// ApplyTemplateMSOffice parses MS Office attachment files and applies the supplied phishing template. +func (a *Attachment) ApplyTemplateMSOffice(ptx PhishingTemplateContext, decodedAttachment []byte) (io.Reader, error) { + // Most modern office formats are xml based and can be unarchived. + // .docm and .xlsm files are comprised of xml, and a binary blob for the macro code + + // Zip archives require random access for reading, so it's hard to stream bytes. Solution seems to be to use a buffer. + // See https://stackoverflow.com/questions/16946978/how-to-unzip-io-readcloser + b := new(bytes.Buffer) + b.Write(decodedAttachment) + zipReader, err := zip.NewReader(bytes.NewReader(b.Bytes()), int64(b.Len())) // Create a new zip reader from the file + + if err != nil { + return nil, err + } + + newZipArchive := new(bytes.Buffer) + zipWriter := zip.NewWriter(newZipArchive) // For writing the new archive + + // i. Read each file from the Word document archive + // ii. Apply the template to it + // iii. Add the templated content to a new zip Word archive + for _, zipFile := range zipReader.File { + ff, err := zipFile.Open() + if err != nil { + return nil, err + } + defer ff.Close() + contents, err := ioutil.ReadAll(ff) + if err != nil { + return nil, err + } + subFileExtension := filepath.Ext(zipFile.Name) + var tFile string + if subFileExtension == ".xml" || subFileExtension == ".rels" { // Ignore other files, e.g binary ones and images + // First we look for instances where Word has URL escaped our template variables. This seems to happen when inserting a remote image, converting {{.Foo}} to %7b%7b.foo%7d%7d. + // See https://stackoverflow.com/questions/68287630/disable-url-encoding-for-includepicture-in-microsoft-word + rx, _ := regexp.Compile("%7b%7b.([a-zA-Z]+)%7d%7d") + contents := rx.ReplaceAllFunc(contents, func(m []byte) []byte { + d, err := url.QueryUnescape(string(m)) + if err != nil { + return m + } + return []byte(d) + }) + + // For each file apply the template. + tFile, err = ExecuteTemplate(string(contents), ptx) + if err != nil { + zipWriter.Close() // Don't use defer when writing files https://www.joeshaw.org/dont-defer-close-on-writable-files/ + return nil, err + } + // Check if the subfile changed. We only need this to be set once to know in the future to check the 'parent' file + if tFile != string(contents) { + a.vanillaFile = false + } + } else { + tFile = string(contents) // Could move this to the declaration of tFile, but might be confusing to read + } + // Write new Word archive + newZipFile, err := zipWriter.Create(zipFile.Name) + if err != nil { + zipWriter.Close() // Don't use defer when writing files https://www.joeshaw.org/dont-defer-close-on-writable-files/ + return nil, err + } + _, err = newZipFile.Write([]byte(tFile)) + if err != nil { + zipWriter.Close() + return nil, err + } + } + zipWriter.Close() + return bytes.NewReader(newZipArchive.Bytes()), err +} + +// ApplyTemplateTextFiles applies the supplied phishing template to text file attachments (txt, html, ics) +func (a *Attachment) ApplyTemplateTextFiles(ptx PhishingTemplateContext, decodedAttachment []byte) (io.Reader, error) { + + processedAttachment, err := ExecuteTemplate(string(decodedAttachment), ptx) + if err != nil { + return nil, err + } + if processedAttachment != string(decodedAttachment) { + a.vanillaFile = false + } + return strings.NewReader(processedAttachment), nil +} + // ApplyTemplate parses different attachment files and applies the supplied phishing template. func (a *Attachment) ApplyTemplate(ptx PhishingTemplateContext) (io.Reader, error) { @@ -61,18 +148,18 @@ func (a *Attachment) ApplyTemplate(ptx PhishingTemplateContext) (io.Reader, erro // "application/vnd.openxmlformats-officedocument.wordprocessingml.document" fileExtension := filepath.Ext(a.Name) + b, err := ioutil.ReadAll(decodedAttachment) + if err != nil { + return nil, err + } + + // Initially assume that the attachment is vanilla and alter the state if changes are made to files + a.vanillaFile = true + switch fileExtension { - case ".docx", ".docm", ".pptx", ".xlsx", ".xlsm": - // Most modern office formats are xml based and can be unarchived. - // .docm and .xlsm files are comprised of xml, and a binary blob for the macro code - - // Zip archives require random access for reading, so it's hard to stream bytes. Solution seems to be to use a buffer. - // See https://stackoverflow.com/questions/16946978/how-to-unzip-io-readcloser - b := new(bytes.Buffer) - b.ReadFrom(decodedAttachment) - zipReader, err := zip.NewReader(bytes.NewReader(b.Bytes()), int64(b.Len())) // Create a new zip reader from the file - + case ".zip": + zipReader, err := zip.NewReader(bytes.NewReader(b), int64(len(b))) if err != nil { return nil, err } @@ -80,75 +167,55 @@ func (a *Attachment) ApplyTemplate(ptx PhishingTemplateContext) (io.Reader, erro newZipArchive := new(bytes.Buffer) zipWriter := zip.NewWriter(newZipArchive) // For writing the new archive - // i. Read each file from the Word document archive - // ii. Apply the template to it - // iii. Add the templated content to a new zip Word archive - a.vanillaFile = true + // Iterate over every file in the zip and apply the template depending on the filetype for _, zipFile := range zipReader.File { ff, err := zipFile.Open() if err != nil { return nil, err } - defer ff.Close() - contents, err := ioutil.ReadAll(ff) + contents, err := io.ReadAll(ff) if err != nil { return nil, err } - subFileExtension := filepath.Ext(zipFile.Name) - var tFile string - if subFileExtension == ".xml" || subFileExtension == ".rels" { // Ignore other files, e.g binary ones and images - // First we look for instances where Word has URL escaped our template variables. This seems to happen when inserting a remote image, converting {{.Foo}} to %7b%7b.foo%7d%7d. - // See https://stackoverflow.com/questions/68287630/disable-url-encoding-for-includepicture-in-microsoft-word - rx, _ := regexp.Compile("%7b%7b.([a-zA-Z]+)%7d%7d") - contents := rx.ReplaceAllFunc(contents, func(m []byte) []byte { - d, err := url.QueryUnescape(string(m)) - if err != nil { - return m - } - return []byte(d) - }) + defer ff.Close() - // For each file apply the template. - tFile, err = ExecuteTemplate(string(contents), ptx) + subfileExtension := filepath.Ext(zipFile.Name) + var tFile io.Reader + switch subfileExtension { + + case ".docx", ".docm", ".pptx", ".xlsx", ".xlsm": + tFile, err = a.ApplyTemplateMSOffice(ptx, contents) if err != nil { - zipWriter.Close() // Don't use defer when writing files https://www.joeshaw.org/dont-defer-close-on-writable-files/ + zipWriter.Close() return nil, err } - // Check if the subfile changed. We only need this to be set once to know in the future to check the 'parent' file - if tFile != string(contents) { - a.vanillaFile = false + case ".txt", ".html", ".ics": + tFile, err = a.ApplyTemplateTextFiles(ptx, contents) + if err != nil { + zipWriter.Close() + return nil, err } - } else { - tFile = string(contents) // Could move this to the declaration of tFile, but might be confusing to read + default: + tFile = bytes.NewReader(contents) } - // Write new Word archive - newZipFile, err := zipWriter.Create(zipFile.Name) + + // Write the possibly changed file to the new zip file + tmp, err := zipWriter.Create(zipFile.Name) if err != nil { - zipWriter.Close() // Don't use defer when writing files https://www.joeshaw.org/dont-defer-close-on-writable-files/ + zipWriter.Close() return nil, err } - _, err = newZipFile.Write([]byte(tFile)) - if err != nil { + if _, err := io.Copy(tmp, tFile); err != nil { zipWriter.Close() return nil, err } } zipWriter.Close() return bytes.NewReader(newZipArchive.Bytes()), err - + case ".docx", ".docm", ".pptx", ".xlsx", ".xlsm": + return a.ApplyTemplateMSOffice(ptx, b) case ".txt", ".html", ".ics": - b, err := ioutil.ReadAll(decodedAttachment) - if err != nil { - return nil, err - } - processedAttachment, err := ExecuteTemplate(string(b), ptx) - if err != nil { - return nil, err - } - if processedAttachment == string(b) { - a.vanillaFile = true - } - return strings.NewReader(processedAttachment), nil + return a.ApplyTemplateTextFiles(ptx, b) default: return decodedAttachment, nil // Default is to simply return the file }