Various updates

* Rename arguments for clarity
* Add another argument to persist the last post fetched
* Update README
* Support specifying path to cursor file
This commit is contained in:
Gabriel Garrido 2024-05-11 18:03:35 +02:00
parent f38364896a
commit bee2adcd50
4 changed files with 103 additions and 29 deletions

View file

@ -1,16 +1,16 @@
# Mastodon markdown archive # Mastodon markdown archive
Fetch a Mastodon account's posts and save them as markdown files. Post content is converted to markdown, images are downloaded and inlined, and replies are threaded. Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses). Fetch a Mastodon account's posts and save them as markdown files. Post content is converted to markdown, images are downloaded and inlined, and replies are threaded. A post whose visibility is not `public` is skipped, and the post's id is used as the filename.
For the time being this formats the files in accordance to [Hugo's](https://gohugo.io) front-matter. Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses).
If a post has images, the post is created as a Hugo [page bundle](https://gohugo.io/content-management/page-bundles/) and images are downloaded in the corresponding post directory. If a post has images, the post is created as a Hugo [page bundle](https://gohugo.io/content-management/page-bundles/) and images are downloaded in the corresponding post directory.
I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS). I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS).
## Flags ## Usage
``` ```
Usage of ./mastodon-pesos: Usage of mastodon-markdown-archive:
-dist string -dist string
Path to directory where files will be written (default "./posts") Path to directory where files will be written (default "./posts")
-exclude-reblogs -exclude-reblogs
@ -23,27 +23,63 @@ Usage of ./mastodon-pesos:
Fetch posts lesser than this id Fetch posts lesser than this id
-min-id string -min-id string
Fetch posts immediately newer than this id Fetch posts immediately newer than this id
-persist -persist-first string
Persist most recent post id to /tmp/mastodon-pesos-fid Location to persist the post id of the first post returned
-persist-last string
Location to persist the post id of the last post returned
-since-id string -since-id string
Fetch posts greater than this id Fetch posts greater than this id
-template string
Template to use for post rendering, defaults to templates/post.tmpl
-user string -user string
URL of User's Mastodon account whose toots will be fetched URL of User's Mastodon account whose toots will be fetched
``` ```
## Example ## Example
Here is how I use this to fetch the 15 most recent posts in my Mastodon account. It excludes replies to others, and reblogs. Here is how I use this to archive posts from my Mastodon account. I exclude replies to others, and reblogs.
Lastly, I use `--persist` to save the most recent id to a file and use `--since-id` so that subsequent runs fetch posts only after the most recently fetched post. I use this tool programatically, and I certainly do not want to recreate the archive from scratch each time.
I first use this to generate an archive up to a certain point in time. Then, I use it to archive posts made since the last archived post.
Mastodo imposts an upper limit of 40 posts in their API. With `--persist-first` and `--persist-last` I can save cursors of the upper and lower bound of posts that were fetched. I can then use Mastodon's `max-id`, `min-id`, and `since-id` parameters to get the posts that I need, depending on each cae.
### Generating an entire archive
```sh ```sh
./mastodon-pesos \ mastodon-markdown-archive \
--user https://social.coop/@ggpsv \ --user=https://social.coop/@ggpsv \
--dist ./posts \ --dist=./posts \
--exclude-replies \ --exclude-replies \
--exclude-reblogs \ --exclude-reblogs \
--limit=15 \ --persist-last=./last \
--persist \ --max-id=$(test -f ./last && cat ./last || echo "")
--since-id=$(test -f /tmp/mastodon-pesos-fid && cat /tmp/mastodon-pesos-fid || echo "")
``` ```
Calling this for the first time will fetch the most recent 40 posts. With `--persist-last`, the 40th post's id will be saved at `./last`.
Calling this command iteratively will fetch the account's posts in reverse chronological time, 40 posts at a time. If my account had 160 posts, I'd need to call this command 4 times to create the archive.
### Getting the latest posts
Calling this for the first time will fetch the most recent 40 posts. With `--persist-first`, the most recent post's id will be saved at `./first`.
Calling this command iteratively will only fetch posts that have been made since the last retrieved post.
```sh
mastodon-markdown-archive \
--user=https://social.coop/@ggpsv \
--dist=./posts \
--exclude-replies \
--exclude-reblogs \
--persist-first=./first \
--since-id=$(test -f ./first && cat ./first || echo "")
```
## Template
By default, this tool uses the [post.tmp](./templates/post.tmpl) template to create the markdown file. A different template can be used by passing its path to `--template`.
For information about variables and functions available in the template context, refer to the `Write` method in `[files.go](./files/files.go)`.

View file

@ -66,7 +66,7 @@ func New(userURL string) (Client, error) {
}, nil }, nil
} }
func (c Client) GetPosts(filter PostsFilter) ([]Post, error) { func (c Client) Posts(filter PostsFilter) ([]Post, error) {
var posts []Post var posts []Post
account, err := c.getAccount() account, err := c.getAccount()
@ -152,5 +152,5 @@ func get(requestUrl string, variable interface{}) error {
} }
func ShouldSkipPost(post Post) bool { func ShouldSkipPost(post Post) bool {
return post.Visibility != "unlisted" && post.Visibility != "public" return post.Visibility != "public"
} }

View file

@ -43,9 +43,14 @@ func New(dir string) (FileWriter, error) {
}, nil }, nil
} }
func (f FileWriter) Write(post client.Post) error { func (f FileWriter) Write(post client.Post, templateFile string) error {
tpmlFilename := "templates/post.tmpl" tmplFilename := "templates/post.tmpl"
tmplFile, err := filepath.Abs(tpmlFilename)
if templateFile != "" {
tmplFilename = templateFile
}
tmplFile, err := filepath.Abs(tmplFilename)
if err != nil { if err != nil {
return fmt.Errorf("error resolving template absolute path: %w", err) return fmt.Errorf("error resolving template absolute path: %w", err)
@ -104,7 +109,7 @@ func (f FileWriter) Write(post client.Post) error {
"tomd": converter.ConvertString, "tomd": converter.ConvertString,
} }
tmpl, err := template.New(filepath.Base(tpmlFilename)).Funcs(funcs).ParseFiles(tmplFile) tmpl, err := template.New(filepath.Base(tmplFilename)).Funcs(funcs).ParseFiles(tmplFile)
context := TemplateContext{ context := TemplateContext{
Post: post, Post: post,

51
main.go
View file

@ -3,10 +3,12 @@ package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"git.garrido.io/gabriel/mastodon-markdown-archive/client"
"git.garrido.io/gabriel/mastodon-markdown-archive/files"
"log" "log"
"os" "os"
"path/filepath"
"git.garrido.io/gabriel/mastodon-markdown-archive/client"
"git.garrido.io/gabriel/mastodon-markdown-archive/files"
) )
func main() { func main() {
@ -18,7 +20,9 @@ func main() {
sinceId := flag.String("since-id", "", "Fetch posts greater than this id") sinceId := flag.String("since-id", "", "Fetch posts greater than this id")
maxId := flag.String("max-id", "", "Fetch posts lesser than this id") maxId := flag.String("max-id", "", "Fetch posts lesser than this id")
minId := flag.String("min-id", "", "Fetch posts immediately newer than this id") minId := flag.String("min-id", "", "Fetch posts immediately newer than this id")
persist := flag.Bool("persist", false, "Persist most recent post id to /tmp/mastodon-pesos-fid") persistFirst := flag.String("persist-first", "", "Location to persist the post id of the first post returned")
persistLast := flag.String("persist-last", "", "Location to persist the post id of the last post returned")
templateFile := flag.String("template", "", "Template to use for post rendering, defaults to templates/post.tmpl")
flag.Parse() flag.Parse()
@ -28,7 +32,7 @@ func main() {
log.Panicln(fmt.Errorf("error instantiating client: %w", err)) log.Panicln(fmt.Errorf("error instantiating client: %w", err))
} }
posts, err := c.GetPosts(client.PostsFilter{ posts, err := c.Posts(client.PostsFilter{
ExcludeReplies: *excludeReplies, ExcludeReplies: *excludeReplies,
ExcludeReblogs: *excludeReblogs, ExcludeReblogs: *excludeReblogs,
Limit: *limit, Limit: *limit,
@ -54,16 +58,45 @@ func main() {
continue continue
} }
if err := fileWriter.Write(post); err != nil { if err := fileWriter.Write(post, *templateFile); err != nil {
log.Panicln("error writing post to file: %w", err) log.Panicln("error writing post to file: %w", err)
break break
} }
} }
if *persist && len(posts) > 0 { postsCount := len(posts)
lastPost := posts[0]
fid := []byte(lastPost.Id) if postsCount > 0 {
os.WriteFile("/tmp/mastodon-pesos-fid", fid, 0644) if *persistFirst != "" {
firstPost := posts[0]
err := persistId(firstPost.Id, *persistFirst)
if err != nil {
log.Panicln(err)
}
}
if *persistLast != "" {
lastPost := posts[postsCount-1]
err := persistId(lastPost.Id, *persistLast)
if err != nil {
log.Panicln(err)
}
}
} }
} }
func persistId(postId string, path string) error {
persistPath, err := filepath.Abs(path)
if err != nil {
return err
}
if err := os.WriteFile(persistPath, []byte(postId), 0644); err != nil {
return err
}
return nil
}