mirror of
https://github.com/adulau/mastodon-markdown-archive.git
synced 2024-11-21 17:37:06 +00:00
Support controlling media downloads
This commit is contained in:
parent
bafed4ca9b
commit
47faaf7a27
4 changed files with 75 additions and 29 deletions
21
README.md
21
README.md
|
@ -4,8 +4,6 @@ Fetch a Mastodon account's posts and save them as markdown files. Post content i
|
|||
|
||||
Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses).
|
||||
|
||||
If a post has images, the post is created as a bundle of files in the manner of Hugo [page bundles](https://gohugo.io/content-management/page-bundles/), and the images are downloaded in the corresponding directory.
|
||||
|
||||
I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS).
|
||||
|
||||
## Install
|
||||
|
@ -19,6 +17,8 @@ You can clone this repo and run `go build main.go` in the repository's directory
|
|||
Usage of mastodon-markdown-archive:
|
||||
-dist string
|
||||
Path to directory where files will be written (default "./posts")
|
||||
-download-media string
|
||||
Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.
|
||||
-exclude-reblogs
|
||||
Exclude reblogs
|
||||
-exclude-replies
|
||||
|
@ -45,6 +45,7 @@ Usage of mastodon-markdown-archive:
|
|||
Thread replies for a post in a single file
|
||||
-user string
|
||||
URL of Mastodon account whose toots will be fetched
|
||||
|
||||
```
|
||||
|
||||
## Example
|
||||
|
@ -246,4 +247,18 @@ For both the post and filename templates, the following functions and variables
|
|||
* `toMarkdownEscaped` to convert the post's HTML content to Markdown, escaping any markdown syntax
|
||||
|
||||
#### Variables
|
||||
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
|
||||
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
|
||||
|
||||
## Post media
|
||||
|
||||
By default, a post's media is not downloaded. Use the `--download-media` flag with a path to download a post's media. The post's original file is downloaded, and the image's id is used as the filename.
|
||||
|
||||
For example, `--download-media=./images` saves any media to the `./images`.
|
||||
|
||||
Once downloaded, the media's path is available in [MediaAttachment.Path](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#MediaAttachment) as an absolute path.
|
||||
|
||||
Sprig's [path](https://masterminds.github.io/sprig/paths.html) functions can be used in the templates to manipulate the path as necessary. For example, the default template uses `osBase` to get the last element of the filepath.
|
||||
|
||||
You can use `--download-media=bundle` to save the post media in a single directory with its original post. In this case, the post's filename will be used as the directory name and the post filename will be `index.{extension}`. This is done specifically to support Hugo [page bundles](https://gohugo.io/content-management/page-bundles).
|
||||
|
||||
For example, `--download-media="./bundle" --filename='{{ .Post.CreatedAt | date "2006-01-02" }}-{{.Post.Id}}.md'` will create a `YYYY-MM-DD-<post id>/` directory, with the post saved as `YYYY-MM-DD-<post id>/index.md` and media saved as `YYYY-MM-DD-<post id>/<media id>.<media ext>`.
|
|
@ -24,6 +24,7 @@ type FileWriter struct {
|
|||
dir string
|
||||
templateFile string
|
||||
filenameTemplate string
|
||||
downloadMedia string
|
||||
}
|
||||
|
||||
type TemplateContext struct {
|
||||
|
@ -47,7 +48,7 @@ type PostFile struct {
|
|||
File *os.File
|
||||
}
|
||||
|
||||
func New(dir string, templateFile string, filenameTemplate string) (FileWriter, error) {
|
||||
func New(dir, templateFile, filenameTemplate, downloadMedia string) (FileWriter, error) {
|
||||
var fileWriter FileWriter
|
||||
_, err := os.Stat(dir)
|
||||
|
||||
|
@ -65,6 +66,7 @@ func New(dir string, templateFile string, filenameTemplate string) (FileWriter,
|
|||
dir: absDir,
|
||||
templateFile: templateFile,
|
||||
filenameTemplate: filenameTemplate,
|
||||
downloadMedia: downloadMedia,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -76,20 +78,34 @@ func (f *FileWriter) Write(post *client.Post) error {
|
|||
}
|
||||
defer postFile.File.Close()
|
||||
|
||||
if len(post.MediaAttachments) > 0 {
|
||||
err = downloadAttachments(post.MediaAttachments, postFile.Dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if f.downloadMedia != "" && len(post.AllMedia()) > 0 {
|
||||
var mediaDir string
|
||||
|
||||
for _, descendant := range post.Descendants() {
|
||||
if len(descendant.MediaAttachments) > 0 {
|
||||
err = downloadAttachments(descendant.MediaAttachments, postFile.Dir)
|
||||
if f.downloadMedia == "bundle" {
|
||||
mediaDir = postFile.Dir
|
||||
} else {
|
||||
_, err := os.Stat(f.downloadMedia)
|
||||
if os.IsNotExist(err) {
|
||||
os.Mkdir(f.downloadMedia, os.ModePerm)
|
||||
}
|
||||
mediaDir = f.downloadMedia
|
||||
}
|
||||
|
||||
if len(post.MediaAttachments) > 0 {
|
||||
err = downloadAttachments(post.MediaAttachments, mediaDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, descendant := range post.Descendants() {
|
||||
if len(descendant.MediaAttachments) > 0 {
|
||||
err = downloadAttachments(descendant.MediaAttachments, mediaDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tmpl, err := resolveTemplate(f.templateFile)
|
||||
|
@ -131,9 +147,9 @@ func (f *FileWriter) formatFilename(post *client.Post) (string, error) {
|
|||
func (f FileWriter) createFile(post *client.Post) (PostFile, error) {
|
||||
var postFile PostFile
|
||||
|
||||
shouldBundle := len(post.AllMedia()) > 0
|
||||
outputFilename, err := f.formatFilename(post)
|
||||
extension := filepath.Ext(outputFilename)
|
||||
shouldBundle := f.downloadMedia == "bundle" && len(post.AllMedia()) > 0
|
||||
|
||||
if extension == "" {
|
||||
extension = ".md"
|
||||
|
@ -192,20 +208,26 @@ func downloadAttachments(attachments []client.MediaAttachment, dir string) error
|
|||
continue
|
||||
}
|
||||
|
||||
imageFilename, err := downloadAttachment(dir, media.Id, media.URL)
|
||||
imageFile, err := downloadAttachment(dir, media.Id, media.URL)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
media.Path = imageFilename
|
||||
absImageFile, err := filepath.Abs(imageFile.Name())
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
media.Path = absImageFile
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func downloadAttachment(dir string, id string, url string) (string, error) {
|
||||
var filename string
|
||||
func downloadAttachment(dir string, id string, url string) (*os.File, error) {
|
||||
var file *os.File
|
||||
|
||||
client := &http.Client{}
|
||||
req, _ := http.NewRequest("GET", url, nil)
|
||||
|
@ -213,7 +235,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
|
|||
res, err := client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
return file, err
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
|
@ -222,7 +244,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
|
|||
extensions, err := mime.ExtensionsByType(contentType)
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
return file, err
|
||||
}
|
||||
|
||||
var extension string
|
||||
|
@ -236,24 +258,24 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
|
|||
}
|
||||
|
||||
if extension == "" {
|
||||
return filename, fmt.Errorf("could not match extension for media")
|
||||
return file, fmt.Errorf("could not match extension for media")
|
||||
}
|
||||
|
||||
filename = fmt.Sprintf("%s%s", id, extension)
|
||||
file, err := os.Create(filepath.Join(dir, filename))
|
||||
filename := fmt.Sprintf("%s%s", id, extension)
|
||||
file, err = os.Create(filepath.Join(dir, filename))
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
return file, err
|
||||
}
|
||||
|
||||
defer file.Close()
|
||||
_, err = io.Copy(file, res.Body)
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
return file, err
|
||||
}
|
||||
|
||||
return filename, nil
|
||||
return file, nil
|
||||
}
|
||||
|
||||
func resolveTemplate(templateFile string) (*template.Template, error) {
|
||||
|
|
|
@ -22,7 +22,11 @@ descendants:
|
|||
|
||||
{{ range .Post.MediaAttachments }}
|
||||
{{- if eq .Type "image" }}
|
||||
![{{ .Description }}]({{ .Path }})
|
||||
{{- if .Path }}
|
||||
![{{ .Description }}]({{ osBase .Path }})
|
||||
{{- else }}
|
||||
![{{ .Description }}]({{ .URL }})
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
{{- end -}}
|
||||
|
||||
|
@ -30,7 +34,11 @@ descendants:
|
|||
{{ .Content | toMarkdown }}
|
||||
{{ range .MediaAttachments }}
|
||||
{{- if eq .Type "image" }}
|
||||
![{{ .Description }}]({{ .Path }})
|
||||
{{- if .Path }}
|
||||
![{{ .Description }}]({{ osBase .Path }})
|
||||
{{- else }}
|
||||
![{{ .Description }}]({{ .URL }})
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
|
3
main.go
3
main.go
|
@ -26,6 +26,7 @@ func main() {
|
|||
threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file")
|
||||
filenameTemplate := flag.String("filename", "", "Template for post filename")
|
||||
porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner")
|
||||
downloadMedia := flag.String("download-media", "", "Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
|
@ -42,7 +43,7 @@ func main() {
|
|||
log.Panicln(err)
|
||||
}
|
||||
|
||||
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate)
|
||||
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate, *downloadMedia)
|
||||
posts := c.Posts()
|
||||
postsCount := len(posts)
|
||||
|
||||
|
|
Loading…
Reference in a new issue