mirror of
https://github.com/adulau/mastodon-markdown-archive.git
synced 2024-11-21 17:37:06 +00:00
Support controlling media downloads
This commit is contained in:
parent
bafed4ca9b
commit
47faaf7a27
4 changed files with 75 additions and 29 deletions
21
README.md
21
README.md
|
@ -4,8 +4,6 @@ Fetch a Mastodon account's posts and save them as markdown files. Post content i
|
||||||
|
|
||||||
Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses).
|
Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses).
|
||||||
|
|
||||||
If a post has images, the post is created as a bundle of files in the manner of Hugo [page bundles](https://gohugo.io/content-management/page-bundles/), and the images are downloaded in the corresponding directory.
|
|
||||||
|
|
||||||
I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS).
|
I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS).
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
@ -19,6 +17,8 @@ You can clone this repo and run `go build main.go` in the repository's directory
|
||||||
Usage of mastodon-markdown-archive:
|
Usage of mastodon-markdown-archive:
|
||||||
-dist string
|
-dist string
|
||||||
Path to directory where files will be written (default "./posts")
|
Path to directory where files will be written (default "./posts")
|
||||||
|
-download-media string
|
||||||
|
Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.
|
||||||
-exclude-reblogs
|
-exclude-reblogs
|
||||||
Exclude reblogs
|
Exclude reblogs
|
||||||
-exclude-replies
|
-exclude-replies
|
||||||
|
@ -45,6 +45,7 @@ Usage of mastodon-markdown-archive:
|
||||||
Thread replies for a post in a single file
|
Thread replies for a post in a single file
|
||||||
-user string
|
-user string
|
||||||
URL of Mastodon account whose toots will be fetched
|
URL of Mastodon account whose toots will be fetched
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example
|
## Example
|
||||||
|
@ -246,4 +247,18 @@ For both the post and filename templates, the following functions and variables
|
||||||
* `toMarkdownEscaped` to convert the post's HTML content to Markdown, escaping any markdown syntax
|
* `toMarkdownEscaped` to convert the post's HTML content to Markdown, escaping any markdown syntax
|
||||||
|
|
||||||
#### Variables
|
#### Variables
|
||||||
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
|
* [Post](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#Post)
|
||||||
|
|
||||||
|
## Post media
|
||||||
|
|
||||||
|
By default, a post's media is not downloaded. Use the `--download-media` flag with a path to download a post's media. The post's original file is downloaded, and the image's id is used as the filename.
|
||||||
|
|
||||||
|
For example, `--download-media=./images` saves any media to the `./images`.
|
||||||
|
|
||||||
|
Once downloaded, the media's path is available in [MediaAttachment.Path](https://pkg.go.dev/git.garrido.io/gabriel/mastodon-markdown-archive/client#MediaAttachment) as an absolute path.
|
||||||
|
|
||||||
|
Sprig's [path](https://masterminds.github.io/sprig/paths.html) functions can be used in the templates to manipulate the path as necessary. For example, the default template uses `osBase` to get the last element of the filepath.
|
||||||
|
|
||||||
|
You can use `--download-media=bundle` to save the post media in a single directory with its original post. In this case, the post's filename will be used as the directory name and the post filename will be `index.{extension}`. This is done specifically to support Hugo [page bundles](https://gohugo.io/content-management/page-bundles).
|
||||||
|
|
||||||
|
For example, `--download-media="./bundle" --filename='{{ .Post.CreatedAt | date "2006-01-02" }}-{{.Post.Id}}.md'` will create a `YYYY-MM-DD-<post id>/` directory, with the post saved as `YYYY-MM-DD-<post id>/index.md` and media saved as `YYYY-MM-DD-<post id>/<media id>.<media ext>`.
|
|
@ -24,6 +24,7 @@ type FileWriter struct {
|
||||||
dir string
|
dir string
|
||||||
templateFile string
|
templateFile string
|
||||||
filenameTemplate string
|
filenameTemplate string
|
||||||
|
downloadMedia string
|
||||||
}
|
}
|
||||||
|
|
||||||
type TemplateContext struct {
|
type TemplateContext struct {
|
||||||
|
@ -47,7 +48,7 @@ type PostFile struct {
|
||||||
File *os.File
|
File *os.File
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(dir string, templateFile string, filenameTemplate string) (FileWriter, error) {
|
func New(dir, templateFile, filenameTemplate, downloadMedia string) (FileWriter, error) {
|
||||||
var fileWriter FileWriter
|
var fileWriter FileWriter
|
||||||
_, err := os.Stat(dir)
|
_, err := os.Stat(dir)
|
||||||
|
|
||||||
|
@ -65,6 +66,7 @@ func New(dir string, templateFile string, filenameTemplate string) (FileWriter,
|
||||||
dir: absDir,
|
dir: absDir,
|
||||||
templateFile: templateFile,
|
templateFile: templateFile,
|
||||||
filenameTemplate: filenameTemplate,
|
filenameTemplate: filenameTemplate,
|
||||||
|
downloadMedia: downloadMedia,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,20 +78,34 @@ func (f *FileWriter) Write(post *client.Post) error {
|
||||||
}
|
}
|
||||||
defer postFile.File.Close()
|
defer postFile.File.Close()
|
||||||
|
|
||||||
if len(post.MediaAttachments) > 0 {
|
if f.downloadMedia != "" && len(post.AllMedia()) > 0 {
|
||||||
err = downloadAttachments(post.MediaAttachments, postFile.Dir)
|
var mediaDir string
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, descendant := range post.Descendants() {
|
if f.downloadMedia == "bundle" {
|
||||||
if len(descendant.MediaAttachments) > 0 {
|
mediaDir = postFile.Dir
|
||||||
err = downloadAttachments(descendant.MediaAttachments, postFile.Dir)
|
} else {
|
||||||
|
_, err := os.Stat(f.downloadMedia)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
os.Mkdir(f.downloadMedia, os.ModePerm)
|
||||||
|
}
|
||||||
|
mediaDir = f.downloadMedia
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(post.MediaAttachments) > 0 {
|
||||||
|
err = downloadAttachments(post.MediaAttachments, mediaDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, descendant := range post.Descendants() {
|
||||||
|
if len(descendant.MediaAttachments) > 0 {
|
||||||
|
err = downloadAttachments(descendant.MediaAttachments, mediaDir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tmpl, err := resolveTemplate(f.templateFile)
|
tmpl, err := resolveTemplate(f.templateFile)
|
||||||
|
@ -131,9 +147,9 @@ func (f *FileWriter) formatFilename(post *client.Post) (string, error) {
|
||||||
func (f FileWriter) createFile(post *client.Post) (PostFile, error) {
|
func (f FileWriter) createFile(post *client.Post) (PostFile, error) {
|
||||||
var postFile PostFile
|
var postFile PostFile
|
||||||
|
|
||||||
shouldBundle := len(post.AllMedia()) > 0
|
|
||||||
outputFilename, err := f.formatFilename(post)
|
outputFilename, err := f.formatFilename(post)
|
||||||
extension := filepath.Ext(outputFilename)
|
extension := filepath.Ext(outputFilename)
|
||||||
|
shouldBundle := f.downloadMedia == "bundle" && len(post.AllMedia()) > 0
|
||||||
|
|
||||||
if extension == "" {
|
if extension == "" {
|
||||||
extension = ".md"
|
extension = ".md"
|
||||||
|
@ -192,20 +208,26 @@ func downloadAttachments(attachments []client.MediaAttachment, dir string) error
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
imageFilename, err := downloadAttachment(dir, media.Id, media.URL)
|
imageFile, err := downloadAttachment(dir, media.Id, media.URL)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
media.Path = imageFilename
|
absImageFile, err := filepath.Abs(imageFile.Name())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
media.Path = absImageFile
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func downloadAttachment(dir string, id string, url string) (string, error) {
|
func downloadAttachment(dir string, id string, url string) (*os.File, error) {
|
||||||
var filename string
|
var file *os.File
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
req, _ := http.NewRequest("GET", url, nil)
|
req, _ := http.NewRequest("GET", url, nil)
|
||||||
|
@ -213,7 +235,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
|
||||||
res, err := client.Do(req)
|
res, err := client.Do(req)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return filename, err
|
return file, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
@ -222,7 +244,7 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
|
||||||
extensions, err := mime.ExtensionsByType(contentType)
|
extensions, err := mime.ExtensionsByType(contentType)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return filename, err
|
return file, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var extension string
|
var extension string
|
||||||
|
@ -236,24 +258,24 @@ func downloadAttachment(dir string, id string, url string) (string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if extension == "" {
|
if extension == "" {
|
||||||
return filename, fmt.Errorf("could not match extension for media")
|
return file, fmt.Errorf("could not match extension for media")
|
||||||
}
|
}
|
||||||
|
|
||||||
filename = fmt.Sprintf("%s%s", id, extension)
|
filename := fmt.Sprintf("%s%s", id, extension)
|
||||||
file, err := os.Create(filepath.Join(dir, filename))
|
file, err = os.Create(filepath.Join(dir, filename))
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return filename, err
|
return file, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
_, err = io.Copy(file, res.Body)
|
_, err = io.Copy(file, res.Body)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return filename, err
|
return file, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return filename, nil
|
return file, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func resolveTemplate(templateFile string) (*template.Template, error) {
|
func resolveTemplate(templateFile string) (*template.Template, error) {
|
||||||
|
|
|
@ -22,7 +22,11 @@ descendants:
|
||||||
|
|
||||||
{{ range .Post.MediaAttachments }}
|
{{ range .Post.MediaAttachments }}
|
||||||
{{- if eq .Type "image" }}
|
{{- if eq .Type "image" }}
|
||||||
![{{ .Description }}]({{ .Path }})
|
{{- if .Path }}
|
||||||
|
![{{ .Description }}]({{ osBase .Path }})
|
||||||
|
{{- else }}
|
||||||
|
![{{ .Description }}]({{ .URL }})
|
||||||
|
{{- end }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
|
||||||
|
@ -30,7 +34,11 @@ descendants:
|
||||||
{{ .Content | toMarkdown }}
|
{{ .Content | toMarkdown }}
|
||||||
{{ range .MediaAttachments }}
|
{{ range .MediaAttachments }}
|
||||||
{{- if eq .Type "image" }}
|
{{- if eq .Type "image" }}
|
||||||
![{{ .Description }}]({{ .Path }})
|
{{- if .Path }}
|
||||||
|
![{{ .Description }}]({{ osBase .Path }})
|
||||||
|
{{- else }}
|
||||||
|
![{{ .Description }}]({{ .URL }})
|
||||||
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
3
main.go
3
main.go
|
@ -26,6 +26,7 @@ func main() {
|
||||||
threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file")
|
threaded := flag.Bool("threaded", false, "Thread replies for a post in a single file")
|
||||||
filenameTemplate := flag.String("filename", "", "Template for post filename")
|
filenameTemplate := flag.String("filename", "", "Template for post filename")
|
||||||
porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner")
|
porcelain := flag.Bool("porcelain", false, "Prints the amount of fetched posts to stdout in a parsable manner")
|
||||||
|
downloadMedia := flag.String("download-media", "", "Download media in a post. Omit or pass an empty string to not download media. Pass 'bundle' to download the media inline in a single directory with its original post. Pass a path to a directory to download all media there.")
|
||||||
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
@ -42,7 +43,7 @@ func main() {
|
||||||
log.Panicln(err)
|
log.Panicln(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate)
|
fileWriter, err := files.New(*dist, *templateFile, *filenameTemplate, *downloadMedia)
|
||||||
posts := c.Posts()
|
posts := c.Posts()
|
||||||
postsCount := len(posts)
|
postsCount := len(posts)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue