diff --git a/README.md b/README.md index d775d9a..2689229 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ # Mastodon markdown archive -Fetch a Mastodon account's posts and save them as markdown files. Post content is converted to markdown, images are downloaded and inlined, and replies are threaded. Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses). +Fetch a Mastodon account's posts and save them as markdown files. Post content is converted to markdown, images are downloaded and inlined, and replies are threaded. A post whose visibility is not `public` is skipped, and the post's id is used as the filename. -For the time being this formats the files in accordance to [Hugo's](https://gohugo.io) front-matter. +Implements most of the parameters in Mastodon's public [API to get an account's statuses](https://docs.joinmastodon.org/methods/accounts/#statuses). If a post has images, the post is created as a Hugo [page bundle](https://gohugo.io/content-management/page-bundles/) and images are downloaded in the corresponding post directory. I use this tool to create an [archive of my Mastodon posts](https://garrido.io/microblog/), which I then syndicate to my own site following [PESOS](https://indieweb.org/PESOS). -## Flags +## Usage ``` -Usage of ./mastodon-pesos: +Usage of mastodon-markdown-archive: -dist string Path to directory where files will be written (default "./posts") -exclude-reblogs @@ -23,27 +23,63 @@ Usage of ./mastodon-pesos: Fetch posts lesser than this id -min-id string Fetch posts immediately newer than this id - -persist - Persist most recent post id to /tmp/mastodon-pesos-fid + -persist-first string + Location to persist the post id of the first post returned + -persist-last string + Location to persist the post id of the last post returned -since-id string Fetch posts greater than this id + -template string + Template to use for post rendering, defaults to templates/post.tmpl -user string URL of User's Mastodon account whose toots will be fetched ``` ## Example -Here is how I use this to fetch the 15 most recent posts in my Mastodon account. It excludes replies to others, and reblogs. +Here is how I use this to archive posts from my Mastodon account. I exclude replies to others, and reblogs. -Lastly, I use `--persist` to save the most recent id to a file and use `--since-id` so that subsequent runs fetch posts only after the most recently fetched post. +I use this tool programatically, and I certainly do not want to recreate the archive from scratch each time. + +I first use this to generate an archive up to a certain point in time. Then, I use it to archive posts made since the last archived post. + +Mastodo imposts an upper limit of 40 posts in their API. With `--persist-first` and `--persist-last` I can save cursors of the upper and lower bound of posts that were fetched. I can then use Mastodon's `max-id`, `min-id`, and `since-id` parameters to get the posts that I need, depending on each cae. + +### Generating an entire archive ```sh -./mastodon-pesos \ ---user https://social.coop/@ggpsv \ ---dist ./posts \ +mastodon-markdown-archive \ +--user=https://social.coop/@ggpsv \ +--dist=./posts \ --exclude-replies \ --exclude-reblogs \ ---limit=15 \ ---persist \ ---since-id=$(test -f /tmp/mastodon-pesos-fid && cat /tmp/mastodon-pesos-fid || echo "") +--persist-last=./last \ +--max-id=$(test -f ./last && cat ./last || echo "") ``` + +Calling this for the first time will fetch the most recent 40 posts. With `--persist-last`, the 40th post's id will be saved at `./last`. + +Calling this command iteratively will fetch the account's posts in reverse chronological time, 40 posts at a time. If my account had 160 posts, I'd need to call this command 4 times to create the archive. + +### Getting the latest posts + +Calling this for the first time will fetch the most recent 40 posts. With `--persist-first`, the most recent post's id will be saved at `./first`. + +Calling this command iteratively will only fetch posts that have been made since the last retrieved post. + +```sh +mastodon-markdown-archive \ +--user=https://social.coop/@ggpsv \ +--dist=./posts \ +--exclude-replies \ +--exclude-reblogs \ +--persist-first=./first \ +--since-id=$(test -f ./first && cat ./first || echo "") +``` + +## Template + +By default, this tool uses the [post.tmp](./templates/post.tmpl) template to create the markdown file. A different template can be used by passing its path to `--template`. + +For information about variables and functions available in the template context, refer to the `Write` method in `[files.go](./files/files.go)`. + diff --git a/client/client.go b/client/client.go index 254a3a2..bd3c665 100644 --- a/client/client.go +++ b/client/client.go @@ -66,7 +66,7 @@ func New(userURL string) (Client, error) { }, nil } -func (c Client) GetPosts(filter PostsFilter) ([]Post, error) { +func (c Client) Posts(filter PostsFilter) ([]Post, error) { var posts []Post account, err := c.getAccount() @@ -152,5 +152,5 @@ func get(requestUrl string, variable interface{}) error { } func ShouldSkipPost(post Post) bool { - return post.Visibility != "unlisted" && post.Visibility != "public" + return post.Visibility != "public" } diff --git a/files/files.go b/files/files.go index e3e9761..c9adbf2 100644 --- a/files/files.go +++ b/files/files.go @@ -43,9 +43,14 @@ func New(dir string) (FileWriter, error) { }, nil } -func (f FileWriter) Write(post client.Post) error { - tpmlFilename := "templates/post.tmpl" - tmplFile, err := filepath.Abs(tpmlFilename) +func (f FileWriter) Write(post client.Post, templateFile string) error { + tmplFilename := "templates/post.tmpl" + + if templateFile != "" { + tmplFilename = templateFile + } + + tmplFile, err := filepath.Abs(tmplFilename) if err != nil { return fmt.Errorf("error resolving template absolute path: %w", err) @@ -104,7 +109,7 @@ func (f FileWriter) Write(post client.Post) error { "tomd": converter.ConvertString, } - tmpl, err := template.New(filepath.Base(tpmlFilename)).Funcs(funcs).ParseFiles(tmplFile) + tmpl, err := template.New(filepath.Base(tmplFilename)).Funcs(funcs).ParseFiles(tmplFile) context := TemplateContext{ Post: post, diff --git a/main.go b/main.go index 326812a..4498c4c 100644 --- a/main.go +++ b/main.go @@ -3,10 +3,12 @@ package main import ( "flag" "fmt" - "git.garrido.io/gabriel/mastodon-markdown-archive/client" - "git.garrido.io/gabriel/mastodon-markdown-archive/files" "log" "os" + "path/filepath" + + "git.garrido.io/gabriel/mastodon-markdown-archive/client" + "git.garrido.io/gabriel/mastodon-markdown-archive/files" ) func main() { @@ -18,7 +20,9 @@ func main() { sinceId := flag.String("since-id", "", "Fetch posts greater than this id") maxId := flag.String("max-id", "", "Fetch posts lesser than this id") minId := flag.String("min-id", "", "Fetch posts immediately newer than this id") - persist := flag.Bool("persist", false, "Persist most recent post id to /tmp/mastodon-pesos-fid") + persistFirst := flag.String("persist-first", "", "Location to persist the post id of the first post returned") + persistLast := flag.String("persist-last", "", "Location to persist the post id of the last post returned") + templateFile := flag.String("template", "", "Template to use for post rendering, defaults to templates/post.tmpl") flag.Parse() @@ -28,7 +32,7 @@ func main() { log.Panicln(fmt.Errorf("error instantiating client: %w", err)) } - posts, err := c.GetPosts(client.PostsFilter{ + posts, err := c.Posts(client.PostsFilter{ ExcludeReplies: *excludeReplies, ExcludeReblogs: *excludeReblogs, Limit: *limit, @@ -54,16 +58,45 @@ func main() { continue } - if err := fileWriter.Write(post); err != nil { + if err := fileWriter.Write(post, *templateFile); err != nil { log.Panicln("error writing post to file: %w", err) break } } - if *persist && len(posts) > 0 { - lastPost := posts[0] + postsCount := len(posts) - fid := []byte(lastPost.Id) - os.WriteFile("/tmp/mastodon-pesos-fid", fid, 0644) + if postsCount > 0 { + if *persistFirst != "" { + firstPost := posts[0] + err := persistId(firstPost.Id, *persistFirst) + + if err != nil { + log.Panicln(err) + } + } + + if *persistLast != "" { + lastPost := posts[postsCount-1] + err := persistId(lastPost.Id, *persistLast) + + if err != nil { + log.Panicln(err) + } + } } } + +func persistId(postId string, path string) error { + persistPath, err := filepath.Abs(path) + + if err != nil { + return err + } + + if err := os.WriteFile(persistPath, []byte(postId), 0644); err != nil { + return err + } + + return nil +}