Support controlling media downloads

This commit is contained in:
Gabriel Garrido 2024-05-19 19:05:39 +02:00
parent bafed4ca9b
commit 47faaf7a27
4 changed files with 75 additions and 29 deletions

View file

@ -4,8 +4,6 @@ Fetch a Mastodon account's posts and save them as markdown files. Post content i
Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses).
If a post has images, the post is created as a bundle of files in the manner of Hugo [page bundles](https://gohugo.io/content-management/page-bundles/), and the images are downloaded in the corresponding directory.
I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS).
## Install
@ -19,6 +17,8 @@ You can clone this repo and run `go build main.go` in the repository's directory
Usage of mastodon-markdown-archive:
-dist string
Path to directory where files will be written (default "./posts")
-download-media string
Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.
-exclude-reblogs
Exclude reblogs
-exclude-replies
@ -45,6 +45,7 @@ Usage of mastodon-markdown-archive:
Thread replies for a post in a single file
-user string
URL of Mastodon account whose toots will be fetched
```
## Example
@ -246,4 +247,18 @@ For both the post and filename templates, the following functions and variables
* `toMarkdownEscaped` to convert the post's HTML content to Markdown, escaping any markdown syntax
#### Variables
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
## Post media
By default, a post's media is not downloaded. Use the `--download-media` flag with a path to download a post's media. The post's original file is downloaded, and the image's id is used as the filename.
For example, `--download-media=./images` saves any media to the `./images`.
Once downloaded, the media's path is available in [MediaAttachment.Path](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#MediaAttachment) as an absolute path.
Sprig's [path](https://masterminds.github.io/sprig/paths.html) functions can be used in the templates to manipulate the path as necessary. For example, the default template uses `osBase` to get the last element of the filepath.
You can use `--download-media=bundle` to save the post media in a single directory with its original post. In this case, the post's filename will be used as the directory name and the post filename will be `index.{extension}`. This is done specifically to support Hugo [page bundles](https://gohugo.io/content-management/page-bundles).
For example, `--download-media="./bundle" --filename='{{ .Post.CreatedAt | date "2006-01-02" }}-{{.Post.Id}}.md'` will create a `YYYY-MM-DD-<post id>/` directory, with the post saved as `YYYY-MM-DD-<post id>/index.md` and media saved as `YYYY-MM-DD-<post id>/<media id>.<media ext>`.

View file

@ -24,6 +24,7 @@ type FileWriter struct {
dir string
templateFile string
filenameTemplate string
downloadMedia string
}
type TemplateContext struct {
@ -47,7 +48,7 @@ type PostFile struct {
File *os.File
}
func New(dir string, templateFile string, filenameTemplate string) (FileWriter, error) {
func New(dir, templateFile, filenameTemplate, downloadMedia string) (FileWriter, error) {
var fileWriter FileWriter
_, err := os.Stat(dir)
@ -65,6 +66,7 @@ func New(dir string, templateFile string, filenameTemplate string) (FileWriter,
dir: absDir,
templateFile: templateFile,
filenameTemplate: filenameTemplate,
downloadMedia: downloadMedia,
}, nil
}
@ -76,20 +78,34 @@ func (f *FileWriter) Write(post *client.Post) error {
}
defer postFile.File.Close()
if len(post.MediaAttachments) > 0 {
err = downloadAttachments(post.MediaAttachments, postFile.Dir)
if err != nil {
return err
}
}
if f.downloadMedia != "" && len(post.AllMedia()) > 0 {
var mediaDir string
for _, descendant := range post.Descendants() {
if len(descendant.MediaAttachments) > 0 {
err = downloadAttachments(descendant.MediaAttachments, postFile.Dir)
if f.downloadMedia == "bundle" {
mediaDir = postFile.Dir
} else {
_, err := os.Stat(f.downloadMedia)
if os.IsNotExist(err) {
os.Mkdir(f.downloadMedia, os.ModePerm)
}
mediaDir = f.downloadMedia
}
if len(post.MediaAttachments) > 0 {
err = downloadAttachments(post.MediaAttachments, mediaDir)
if err != nil {
return err
}
}
for _, descendant := range post.Descendants() {
if len(descendant.MediaAttachments) > 0 {
err = downloadAttachments(descendant.MediaAttachments, mediaDir)
if err != nil {
return err
}
}
}
}
tmpl, err := resolveTemplate(f.templateFile)
@ -131,9 +147,9 @@ func (f *FileWriter) formatFilename(post *client.Post) (string, error) {
func (f FileWriter) createFile(post *client.Post) (PostFile, error) {
var postFile PostFile
shouldBundle := len(post.AllMedia()) > 0
outputFilename, err := f.formatFilename(post)
extension := filepath.Ext(outputFilename)
shouldBundle := f.downloadMedia == "bundle" && len(post.AllMedia()) > 0
if extension == "" {
extension = ".md"
@ -192,20 +208,26 @@ func downloadAttachments(attachments []client.MediaAttachment, dir string) error
continue
}
imageFilename, err := downloadAttachment(dir, media.Id, media.URL)
imageFile, err := downloadAttachment(dir, media.Id, media.URL)
if err != nil {
return err
}
media.Path = imageFilename
absImageFile, err := filepath.Abs(imageFile.Name())
if err != nil {
return err
}
media.Path = absImageFile
}
return nil
}
func downloadAttachment(dir string, id string, url string) (string, error) {
var filename string
func downloadAttachment(dir string, id string, url string) (*os.File, error) {
var file *os.File
client := &http.Client{}
req, _ := http.NewRequest("GET", url, nil)
@ -213,7 +235,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
res, err := client.Do(req)
if err != nil {
return filename, err
return file, err
}
defer res.Body.Close()
@ -222,7 +244,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
extensions, err := mime.ExtensionsByType(contentType)
if err != nil {
return filename, err
return file, err
}
var extension string
@ -236,24 +258,24 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
}
if extension == "" {
return filename, fmt.Errorf("could not match extension for media")
return file, fmt.Errorf("could not match extension for media")
}
filename = fmt.Sprintf("%s%s", id, extension)
file, err := os.Create(filepath.Join(dir, filename))
filename := fmt.Sprintf("%s%s", id, extension)
file, err = os.Create(filepath.Join(dir, filename))
if err != nil {
return filename, err
return file, err
}
defer file.Close()
_, err = io.Copy(file, res.Body)
if err != nil {
return filename, err
return file, err
}
return filename, nil
return file, nil
}
func resolveTemplate(templateFile string) (*template.Template, error) {

View file

@ -22,7 +22,11 @@ descendants:
{{ range .Post.MediaAttachments }}
{{- if eq .Type "image" }}
![{{ .Description }}]({{ .Path }})
{{- if .Path }}
![{{ .Description }}]({{ osBase .Path }})
{{- else }}
![{{ .Description }}]({{ .URL }})
{{- end }}
{{ end }}
{{- end -}}
@ -30,7 +34,11 @@ descendants:
{{ .Content | toMarkdown }}
{{ range .MediaAttachments }}
{{- if eq .Type "image" }}
![{{ .Description }}]({{ .Path }})
{{- if .Path }}
![{{ .Description }}]({{ osBase .Path }})
{{- else }}
![{{ .Description }}]({{ .URL }})
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View file

@ -26,6 +26,7 @@ func main() {
threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file")
filenameTemplate := flag.String("filename", "", "Template for post filename")
porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner")
downloadMedia := flag.String("download-media", "", "Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.")
flag.Parse()
@ -42,7 +43,7 @@ func main() {
log.Panicln(err)
}
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate)
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate, *downloadMedia)
posts := c.Posts()
postsCount := len(posts)