Support controlling media downloads

This commit is contained in:
Gabriel Garrido 2024-05-19 19:05:39 +02:00
parent bafed4ca9b
commit 47faaf7a27
4 changed files with 75 additions and 29 deletions

View file

@ -4,8 +4,6 @@ Fetch a Mastodon account's posts and save them as markdown files. Post content i
Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses). Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses).
If a post has images, the post is created as a bundle of files in the manner of Hugo [page bundles](https://gohugo.io/content-management/page-bundles/), and the images are downloaded in the corresponding directory.
I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS). I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS).
## Install ## Install
@ -19,6 +17,8 @@ You can clone this repo and run `go build main.go` in the repository's directory
Usage of mastodon-markdown-archive: Usage of mastodon-markdown-archive:
-dist string -dist string
Path to directory where files will be written (default "./posts") Path to directory where files will be written (default "./posts")
-download-media string
Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.
-exclude-reblogs -exclude-reblogs
Exclude reblogs Exclude reblogs
-exclude-replies -exclude-replies
@ -45,6 +45,7 @@ Usage of mastodon-markdown-archive:
Thread replies for a post in a single file Thread replies for a post in a single file
-user string -user string
URL of Mastodon account whose toots will be fetched URL of Mastodon account whose toots will be fetched
``` ```
## Example ## Example
@ -247,3 +248,17 @@ For both the post and filename templates, the following functions and variables
#### Variables #### Variables
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post) * [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
## Post media
By default, a post's media is not downloaded. Use the `--download-media` flag with a path to download a post's media. The post's original file is downloaded, and the image's id is used as the filename.
For example, `--download-media=./images` saves any media to the `./images`.
Once downloaded, the media's path is available in [MediaAttachment.Path](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#MediaAttachment) as an absolute path.
Sprig's [path](https://masterminds.github.io/sprig/paths.html) functions can be used in the templates to manipulate the path as necessary. For example, the default template uses `osBase` to get the last element of the filepath.
You can use `--download-media=bundle` to save the post media in a single directory with its original post. In this case, the post's filename will be used as the directory name and the post filename will be `index.{extension}`. This is done specifically to support Hugo [page bundles](https://gohugo.io/content-management/page-bundles).
For example, `--download-media="./bundle" --filename='{{ .Post.CreatedAt | date "2006-01-02" }}-{{.Post.Id}}.md'` will create a `YYYY-MM-DD-<post id>/` directory, with the post saved as `YYYY-MM-DD-<post id>/index.md` and media saved as `YYYY-MM-DD-<post id>/<media id>.<media ext>`.

View file

@ -24,6 +24,7 @@ type FileWriter struct {
dir string dir string
templateFile string templateFile string
filenameTemplate string filenameTemplate string
downloadMedia string
} }
type TemplateContext struct { type TemplateContext struct {
@ -47,7 +48,7 @@ type PostFile struct {
File *os.File File *os.File
} }
func New(dir string, templateFile string, filenameTemplate string) (FileWriter, error) { func New(dir, templateFile, filenameTemplate, downloadMedia string) (FileWriter, error) {
var fileWriter FileWriter var fileWriter FileWriter
_, err := os.Stat(dir) _, err := os.Stat(dir)
@ -65,6 +66,7 @@ func New(dir string, templateFile string, filenameTemplate string) (FileWriter,
dir: absDir, dir: absDir,
templateFile: templateFile, templateFile: templateFile,
filenameTemplate: filenameTemplate, filenameTemplate: filenameTemplate,
downloadMedia: downloadMedia,
}, nil }, nil
} }
@ -76,20 +78,34 @@ func (f *FileWriter) Write(post *client.Post) error {
} }
defer postFile.File.Close() defer postFile.File.Close()
if len(post.MediaAttachments) > 0 { if f.downloadMedia != "" && len(post.AllMedia()) > 0 {
err = downloadAttachments(post.MediaAttachments, postFile.Dir) var mediaDir string
if err != nil {
return err
}
}
for _, descendant := range post.Descendants() { if f.downloadMedia == "bundle" {
if len(descendant.MediaAttachments) > 0 { mediaDir = postFile.Dir
err = downloadAttachments(descendant.MediaAttachments, postFile.Dir) } else {
_, err := os.Stat(f.downloadMedia)
if os.IsNotExist(err) {
os.Mkdir(f.downloadMedia, os.ModePerm)
}
mediaDir = f.downloadMedia
}
if len(post.MediaAttachments) > 0 {
err = downloadAttachments(post.MediaAttachments, mediaDir)
if err != nil { if err != nil {
return err return err
} }
} }
for _, descendant := range post.Descendants() {
if len(descendant.MediaAttachments) > 0 {
err = downloadAttachments(descendant.MediaAttachments, mediaDir)
if err != nil {
return err
}
}
}
} }
tmpl, err := resolveTemplate(f.templateFile) tmpl, err := resolveTemplate(f.templateFile)
@ -131,9 +147,9 @@ func (f *FileWriter) formatFilename(post *client.Post) (string, error) {
func (f FileWriter) createFile(post *client.Post) (PostFile, error) { func (f FileWriter) createFile(post *client.Post) (PostFile, error) {
var postFile PostFile var postFile PostFile
shouldBundle := len(post.AllMedia()) > 0
outputFilename, err := f.formatFilename(post) outputFilename, err := f.formatFilename(post)
extension := filepath.Ext(outputFilename) extension := filepath.Ext(outputFilename)
shouldBundle := f.downloadMedia == "bundle" && len(post.AllMedia()) > 0
if extension == "" { if extension == "" {
extension = ".md" extension = ".md"
@ -192,20 +208,26 @@ func downloadAttachments(attachments []client.MediaAttachment, dir string) error
continue continue
} }
imageFilename, err := downloadAttachment(dir, media.Id, media.URL) imageFile, err := downloadAttachment(dir, media.Id, media.URL)
if err != nil { if err != nil {
return err return err
} }
media.Path = imageFilename absImageFile, err := filepath.Abs(imageFile.Name())
if err != nil {
return err
}
media.Path = absImageFile
} }
return nil return nil
} }
func downloadAttachment(dir string, id string, url string) (string, error) { func downloadAttachment(dir string, id string, url string) (*os.File, error) {
var filename string var file *os.File
client := &http.Client{} client := &http.Client{}
req, _ := http.NewRequest("GET", url, nil) req, _ := http.NewRequest("GET", url, nil)
@ -213,7 +235,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
res, err := client.Do(req) res, err := client.Do(req)
if err != nil { if err != nil {
return filename, err return file, err
} }
defer res.Body.Close() defer res.Body.Close()
@ -222,7 +244,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
extensions, err := mime.ExtensionsByType(contentType) extensions, err := mime.ExtensionsByType(contentType)
if err != nil { if err != nil {
return filename, err return file, err
} }
var extension string var extension string
@ -236,24 +258,24 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
} }
if extension == "" { if extension == "" {
return filename, fmt.Errorf("could not match extension for media") return file, fmt.Errorf("could not match extension for media")
} }
filename = fmt.Sprintf("%s%s", id, extension) filename := fmt.Sprintf("%s%s", id, extension)
file, err := os.Create(filepath.Join(dir, filename)) file, err = os.Create(filepath.Join(dir, filename))
if err != nil { if err != nil {
return filename, err return file, err
} }
defer file.Close() defer file.Close()
_, err = io.Copy(file, res.Body) _, err = io.Copy(file, res.Body)
if err != nil { if err != nil {
return filename, err return file, err
} }
return filename, nil return file, nil
} }
func resolveTemplate(templateFile string) (*template.Template, error) { func resolveTemplate(templateFile string) (*template.Template, error) {

View file

@ -22,7 +22,11 @@ descendants:
{{ range .Post.MediaAttachments }} {{ range .Post.MediaAttachments }}
{{- if eq .Type "image" }} {{- if eq .Type "image" }}
![{{ .Description }}]({{ .Path }}) {{- if .Path }}
![{{ .Description }}]({{ osBase .Path }})
{{- else }}
![{{ .Description }}]({{ .URL }})
{{- end }}
{{ end }} {{ end }}
{{- end -}} {{- end -}}
@ -30,7 +34,11 @@ descendants:
{{ .Content | toMarkdown }} {{ .Content | toMarkdown }}
{{ range .MediaAttachments }} {{ range .MediaAttachments }}
{{- if eq .Type "image" }} {{- if eq .Type "image" }}
![{{ .Description }}]({{ .Path }}) {{- if .Path }}
![{{ .Description }}]({{ osBase .Path }})
{{- else }}
![{{ .Description }}]({{ .URL }})
{{- end }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- end }} {{- end }}

View file

@ -26,6 +26,7 @@ func main() {
threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file") threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file")
filenameTemplate := flag.String("filename", "", "Template for post filename") filenameTemplate := flag.String("filename", "", "Template for post filename")
porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner") porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner")
downloadMedia := flag.String("download-media", "", "Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.")
flag.Parse() flag.Parse()
@ -42,7 +43,7 @@ func main() {
log.Panicln(err) log.Panicln(err)
} }
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate) fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate, *downloadMedia)
posts := c.Posts() posts := c.Posts()
postsCount := len(posts) postsCount := len(posts)