From 47faaf7a270587e00abe45e42156ac291a23b08c Mon Sep 17 00:00:00 2001 From: Gabriel Garrido Date: Sun, 19 May 2024 19:05:39 +0200 Subject: [PATCH] Support controlling media downloads --- README.md | 21 ++++++++++-- files/files.go | 68 ++++++++++++++++++++++++++------------- files/templates/post.tmpl | 12 +++++-- main.go | 3 +- 4 files changed, 75 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index c9e66ca..a473140 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,6 @@ Fetch a Mastodon account's posts and save them as markdown files. Post content i Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses). -If a post has images, the post is created as a bundle of files in the manner of Hugo [page bundles](https://gohugo.io/content-management/page-bundles/), and the images are downloaded in the corresponding directory. - I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS). ## Install @@ -19,6 +17,8 @@ You can clone this repo and run `go build main.go` in the repository's directory Usage of mastodon-markdown-archive: -dist string Path to directory where files will be written (default "./posts") + -download-media string + Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there. -exclude-reblogs Exclude reblogs -exclude-replies @@ -45,6 +45,7 @@ Usage of mastodon-markdown-archive: Thread replies for a post in a single file -user string URL of Mastodon account whose toots will be fetched + ``` ## Example @@ -246,4 +247,18 @@ For both the post and filename templates, the following functions and variables * `toMarkdownEscaped` to convert the post's HTML content to Markdown, escaping any markdown syntax #### Variables -* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post) \ No newline at end of file +* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post) + +## Post media + +By default, a post's media is not downloaded. Use the `--download-media` flag with a path to download a post's media. The post's original file is downloaded, and the image's id is used as the filename. + +For example, `--download-media=./images` saves any media to the `./images`. + +Once downloaded, the media's path is available in [MediaAttachment.Path](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#MediaAttachment) as an absolute path. + +Sprig's [path](https://masterminds.github.io/sprig/paths.html) functions can be used in the templates to manipulate the path as necessary. For example, the default template uses `osBase` to get the last element of the filepath. + +You can use `--download-media=bundle` to save the post media in a single directory with its original post. In this case, the post's filename will be used as the directory name and the post filename will be `index.{extension}`. This is done specifically to support Hugo [page bundles](https://gohugo.io/content-management/page-bundles). + +For example, `--download-media="./bundle" --filename='{{ .Post.CreatedAt | date "2006-01-02" }}-{{.Post.Id}}.md'` will create a `YYYY-MM-DD-/` directory, with the post saved as `YYYY-MM-DD-/index.md` and media saved as `YYYY-MM-DD-/.`. \ No newline at end of file diff --git a/files/files.go b/files/files.go index 1a22ad1..95502d1 100644 --- a/files/files.go +++ b/files/files.go @@ -24,6 +24,7 @@ type FileWriter struct { dir string templateFile string filenameTemplate string + downloadMedia string } type TemplateContext struct { @@ -47,7 +48,7 @@ type PostFile struct { File *os.File } -func New(dir string, templateFile string, filenameTemplate string) (FileWriter, error) { +func New(dir, templateFile, filenameTemplate, downloadMedia string) (FileWriter, error) { var fileWriter FileWriter _, err := os.Stat(dir) @@ -65,6 +66,7 @@ func New(dir string, templateFile string, filenameTemplate string) (FileWriter, dir: absDir, templateFile: templateFile, filenameTemplate: filenameTemplate, + downloadMedia: downloadMedia, }, nil } @@ -76,20 +78,34 @@ func (f *FileWriter) Write(post *client.Post) error { } defer postFile.File.Close() - if len(post.MediaAttachments) > 0 { - err = downloadAttachments(post.MediaAttachments, postFile.Dir) - if err != nil { - return err - } - } + if f.downloadMedia != "" && len(post.AllMedia()) > 0 { + var mediaDir string - for _, descendant := range post.Descendants() { - if len(descendant.MediaAttachments) > 0 { - err = downloadAttachments(descendant.MediaAttachments, postFile.Dir) + if f.downloadMedia == "bundle" { + mediaDir = postFile.Dir + } else { + _, err := os.Stat(f.downloadMedia) + if os.IsNotExist(err) { + os.Mkdir(f.downloadMedia, os.ModePerm) + } + mediaDir = f.downloadMedia + } + + if len(post.MediaAttachments) > 0 { + err = downloadAttachments(post.MediaAttachments, mediaDir) if err != nil { return err } } + + for _, descendant := range post.Descendants() { + if len(descendant.MediaAttachments) > 0 { + err = downloadAttachments(descendant.MediaAttachments, mediaDir) + if err != nil { + return err + } + } + } } tmpl, err := resolveTemplate(f.templateFile) @@ -131,9 +147,9 @@ func (f *FileWriter) formatFilename(post *client.Post) (string, error) { func (f FileWriter) createFile(post *client.Post) (PostFile, error) { var postFile PostFile - shouldBundle := len(post.AllMedia()) > 0 outputFilename, err := f.formatFilename(post) extension := filepath.Ext(outputFilename) + shouldBundle := f.downloadMedia == "bundle" && len(post.AllMedia()) > 0 if extension == "" { extension = ".md" @@ -192,20 +208,26 @@ func downloadAttachments(attachments []client.MediaAttachment, dir string) error continue } - imageFilename, err := downloadAttachment(dir, media.Id, media.URL) + imageFile, err := downloadAttachment(dir, media.Id, media.URL) if err != nil { return err } - media.Path = imageFilename + absImageFile, err := filepath.Abs(imageFile.Name()) + + if err != nil { + return err + } + + media.Path = absImageFile } return nil } -func downloadAttachment(dir string, id string, url string) (string, error) { - var filename string +func downloadAttachment(dir string, id string, url string) (*os.File, error) { + var file *os.File client := &http.Client{} req, _ := http.NewRequest("GET", url, nil) @@ -213,7 +235,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) { res, err := client.Do(req) if err != nil { - return filename, err + return file, err } defer res.Body.Close() @@ -222,7 +244,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) { extensions, err := mime.ExtensionsByType(contentType) if err != nil { - return filename, err + return file, err } var extension string @@ -236,24 +258,24 @@ func downloadAttachment(dir string, id string, url string) (string, error) { } if extension == "" { - return filename, fmt.Errorf("could not match extension for media") + return file, fmt.Errorf("could not match extension for media") } - filename = fmt.Sprintf("%s%s", id, extension) - file, err := os.Create(filepath.Join(dir, filename)) + filename := fmt.Sprintf("%s%s", id, extension) + file, err = os.Create(filepath.Join(dir, filename)) if err != nil { - return filename, err + return file, err } defer file.Close() _, err = io.Copy(file, res.Body) if err != nil { - return filename, err + return file, err } - return filename, nil + return file, nil } func resolveTemplate(templateFile string) (*template.Template, error) { diff --git a/files/templates/post.tmpl b/files/templates/post.tmpl index 4e6640f..93f2592 100644 --- a/files/templates/post.tmpl +++ b/files/templates/post.tmpl @@ -22,7 +22,11 @@ descendants: {{ range .Post.MediaAttachments }} {{- if eq .Type "image" }} -![{{ .Description }}]({{ .Path }}) +{{- if .Path }} +![{{ .Description }}]({{ osBase .Path }}) +{{- else }} +![{{ .Description }}]({{ .URL }}) +{{- end }} {{ end }} {{- end -}} @@ -30,7 +34,11 @@ descendants: {{ .Content | toMarkdown }} {{ range .MediaAttachments }} {{- if eq .Type "image" }} -![{{ .Description }}]({{ .Path }}) +{{- if .Path }} +![{{ .Description }}]({{ osBase .Path }}) +{{- else }} +![{{ .Description }}]({{ .URL }}) +{{- end }} {{- end }} {{- end }} {{- end }} diff --git a/main.go b/main.go index 19eea4b..533f1c2 100644 --- a/main.go +++ b/main.go @@ -26,6 +26,7 @@ func main() { threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file") filenameTemplate := flag.String("filename", "", "Template for post filename") porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner") + downloadMedia := flag.String("download-media", "", "Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.") flag.Parse() @@ -42,7 +43,7 @@ func main() { log.Panicln(err) } - fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate) + fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate, *downloadMedia) posts := c.Posts() postsCount := len(posts)