diff --git a/client/client.go b/client/client.go index 5f7c7c7..49f5f39 100644 --- a/client/client.go +++ b/client/client.go @@ -1,3 +1,4 @@ +<<<<<<< Updated upstream package client import ( @@ -215,3 +216,313 @@ func get(requestUrl string, variable interface{}) error { func ShouldSkipPost(post Post) bool { return post.Visibility != "public" } +======= +package client + +import ( + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +type Client struct { + handle string + baseURL string + filters PostsFilter + account Account + posts []Post + replies map[string]Post + orphans []string + postIdMap map[string]Post +} + +type Account struct { + Id string `json:"id"` + Username string `json:"username"` + Acct string `json:"acct"` + DisplayName string `json:"display_name"` + Locked bool `json:"locked"` + Bot bool `json:"bot"` + Discoverable bool `json:"discoverable"` + Group bool `json:"group"` + CreatedAt time.Time `json:"created_at"` + Note string `json:"note"` + URL string `json:"url"` + URI string `json:"uri"` + Avatar string `json:"avatar"` + AvatarStatic string `json:"avatar_static"` + Header string `json:"header"` + HeaderStatic string `json:"header_static"` + FollowersCount int `json:"followers_count"` + FollowingCount int `json:"following_count"` + StatusesCount int `json:"statuses_count"` + LastStatusAt string `json:"last_status_at"` +} + +type MediaAttachment struct { + Type string `json:"type"` + URL string `json:"url"` + Description string `json:"description"` + Id string `json:"id"` + Path string +} + +type Application struct { + Name string `json:"name"` + Website string `json:"website"` +} + +type Tag struct { + Name string `json:"name"` + URL string `json:"url"` +} + +type Post struct { + CreatedAt time.Time `json:"created_at"` + Id string `json:"id"` + Visibility string `json:"visibility"` + InReplyToId string `json:"in_reply_to_id"` + InReplyToAccountId string `json:"in_reply_to_account_id"` + Sensitive bool `json:"sensitive"` + SpoilerText string `json:"spoiler_text"` + Language string `json:"language"` + URI string `json:"uri"` + URL string `json:"url"` + Application Application `json:"application"` + Content string `json:"content"` + MediaAttachments []MediaAttachment `json:"media_attachments"` + RepliesCount int `json:"replies_count"` + ReblogsCount int `json:"reblogs_count"` + FavoritesCount int `json:"favourites_count"` + Pinned bool `json:"pinned"` + Tags []Tag `json:"tags"` + Favourited bool `json:"favourited"` + Reblogged bool `json:"reblogged"` + Muted bool `json:"muted"` + Bookmarked bool `json:"bookmarked"` + Account Account `json:"account"` + descendants []Post +} + +type PostsFilter struct { + ExcludeReplies bool + ExcludeReblogs bool + Limit int + SinceId string + MinId string + MaxId string +} + +func New(userURL string, filters PostsFilter, threaded bool) (Client, error) { + var client Client + parsedURL, err := url.Parse(userURL) + + if err != nil { + return client, fmt.Errorf("error parsing user url: %w", err) + } + + baseURL := fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Host) + acc := strings.TrimPrefix(parsedURL.Path, "/") + handle := strings.TrimPrefix(acc, "@") + + account, err := getAccount(baseURL, handle) + + if err != nil { + return client, err + } + + posts, err := getPosts(baseURL, account.Id, filters) + + if err != nil { + return client, err + } + + var orphans []string + client = Client{ + baseURL: baseURL, + handle: handle, + filters: filters, + account: account, + posts: posts, + postIdMap: make(map[string]Post), + replies: make(map[string]Post), + orphans: orphans, + } + + client.populateIdMap() + + if threaded { + client.generateReplies() + } + + for _, orphan := range client.orphans { + log.Println(fmt.Sprintf("Orphan: %s", orphan)) + } + + return client, nil +} + +func (c Client) Account() Account { + return c.account +} + +func (c Client) Posts() []Post { + return c.posts +} + +func (p Post) ShouldSkip() bool { + return p.Visibility != "public" +} + +func (p Post) Descendants() []Post { + return p.descendants +} + +func (c Client) populateIdMap() { + for _, post := range c.posts { + c.postIdMap[post.Id] = post + } +} + +func (c Client) flushReplies(post Post, descendants *[]Post) { + if reply, ok := c.replies[post.Id]; ok { + *descendants = append(*descendants, reply) + c.flushReplies(reply, descendants) + } +} + +func (c Client) generateReplies() { + for _, post := range c.posts { + if post.InReplyToId == "" { + c.flushReplies(post, &post.descendants) + continue + } + + if _, ok := c.postIdMap[post.Id]; ok { + log.Println(fmt.Sprintf("Adding %s to replies of %s", post.Id, post.InReplyToId)) + c.replies[post.InReplyToId] = post + } else { + log.Println("Found orphan") + c.orphans = append(c.orphans, post.Id) + } + } +} + +func getPosts(baseURL string, accountId string, filters PostsFilter) ([]Post, error) { + var posts []Post + + queryValues := url.Values{} + + if filters.ExcludeReplies { + queryValues.Add("exclude_replies", strconv.Itoa(1)) + } + + if filters.ExcludeReblogs { + queryValues.Add("exclude_reblogs", strconv.Itoa(1)) + } + + if filters.SinceId != "" { + queryValues.Add("since_id", filters.SinceId) + } + + if filters.MaxId != "" { + queryValues.Add("max_id", filters.MaxId) + } + + if filters.MinId != "" { + queryValues.Add("min_id", filters.MinId) + } + + queryValues.Add("limit", strconv.Itoa(filters.Limit)) + + query := fmt.Sprintf("?%s", queryValues.Encode()) + + postsUrl := fmt.Sprintf( + "%s/api/v1/accounts/%s/statuses/%s", + baseURL, + accountId, + query, + ) + + log.Println(fmt.Sprintf("Fetching posts from %s", postsUrl)) + + if err := get(postsUrl, &posts); err != nil { + return posts, err + } + + return posts, nil +} + +func getAccount(baseURL string, handle string) (Account, error) { + var account Account + lookupUrl := fmt.Sprintf( + "%s/api/v1/accounts/lookup?acct=%s", + baseURL, + handle, + ) + + err := get(lookupUrl, &account) + + if err != nil { + return account, err + } + + return account, nil +} + +func (p Post) AllTags() []Tag { + var tags []Tag + + for _, tag := range p.Tags { + tags = append(tags, tag) + } + + for _, descendant := range p.descendants { + for _, tag := range descendant.Tags { + tags = append(tags, tag) + } + } + + return tags +} + +func (p Post) AllMedia() []MediaAttachment { + var media []MediaAttachment + + for _, item := range p.MediaAttachments { + media = append(media, item) + } + + for _, descendant := range p.descendants { + for _, item := range descendant.MediaAttachments { + media = append(media, item) + } + } + + return media +} + +func get(requestUrl string, variable interface{}) error { + res, err := http.Get(requestUrl) + + if err != nil { + return err + } + + defer res.Body.Close() + + body, err := io.ReadAll(res.Body) + + if err := json.Unmarshal(body, variable); err != nil { + return err + } + + return nil +} +>>>>>>> Stashed changes diff --git a/files/files.go b/files/files.go index 9b61434..2421cb2 100644 --- a/files/files.go +++ b/files/files.go @@ -1,3 +1,4 @@ +<<<<<<< Updated upstream package files import ( @@ -193,3 +194,235 @@ func resolveTemplate(templateFile string) (*template.Template, error) { return tmpl, nil } +======= +package files + +import ( + "embed" + "fmt" + "io" + "mime" + "net/http" + "os" + "path/filepath" + "text/template" + + "git.garrido.io/gabriel/mastodon-markdown-archive/client" + md "github.com/JohannesKaufmann/html-to-markdown" +) + +//go:embed templates/post.tmpl +var templates embed.FS + +type FileWriter struct { + dir string +} + +type TemplateContext struct { + Post client.Post +} + +type PostFile struct { + Dir string + Name string + File *os.File +} + +func New(dir string) (FileWriter, error) { + var fileWriter FileWriter + _, err := os.Stat(dir) + + if os.IsNotExist(err) { + os.Mkdir(dir, os.ModePerm) + } + + absDir, err := filepath.Abs(dir) + + if err != nil { + return fileWriter, err + } + + return FileWriter{ + dir: absDir, + }, nil +} + +func (f FileWriter) Write(post client.Post, templateFile string) error { + hasMedia := len(post.AllMedia()) > 0 + postFile, err := f.createFile(post, hasMedia) + + if err != nil { + return err + } + defer postFile.File.Close() + + if len(post.MediaAttachments) > 0 { + err = downloadAttachments(post.MediaAttachments, postFile.Dir) + if err != nil { + return err + } + } + + for _, descendant := range post.Descendants() { + if len(descendant.MediaAttachments) > 0 { + err = downloadAttachments(descendant.MediaAttachments, postFile.Dir) + if err != nil { + return err + } + } + } + + tmpl, err := resolveTemplate(templateFile) + context := TemplateContext{ + Post: post, + } + + err = tmpl.Execute(postFile.File, context) + + if err != nil { + return err + } + + return nil +} + +func (f FileWriter) createFile(post client.Post, shouldBundle bool) (PostFile, error) { + var postFile PostFile + + if shouldBundle { + dir := filepath.Join(f.dir, post.Id) + + _, err := os.Stat(dir) + if os.IsNotExist(err) { + os.Mkdir(dir, os.ModePerm) + } + + name := filepath.Join(dir, "index.md") + file, err := os.Create(name) + + if err != nil { + return postFile, err + } + + postFile = PostFile{ + Name: name, + Dir: dir, + File: file, + } + + return postFile, nil + } + + name := filepath.Join(f.dir, fmt.Sprintf("%s.md", post.Id)) + file, err := os.Create(name) + + if err != nil { + return postFile, err + } + + postFile = PostFile{ + Name: name, + Dir: f.dir, + File: file, + } + + return postFile, nil +} + +func downloadAttachments(attachments []client.MediaAttachment, dir string) error { + for i := 0; i < len(attachments); i++ { + media := &attachments[i] + if media.Type != "image" { + continue + } + + imageFilename, err := downloadAttachment(dir, media.Id, media.URL) + + if err != nil { + return err + } + + media.Path = imageFilename + } + + return nil +} + +func downloadAttachment(dir string, id string, url string) (string, error) { + var filename string + + client := &http.Client{} + req, _ := http.NewRequest("GET", url, nil) + req.Header.Set("Accept", "image/*") + res, err := client.Do(req) + + if err != nil { + return filename, err + } + + defer res.Body.Close() + + contentType := res.Header.Get("Content-Type") + extensions, err := mime.ExtensionsByType(contentType) + + if err != nil { + return filename, err + } + + var extension string + urlExtension := filepath.Ext(url) + + for _, i := range extensions { + if i == urlExtension { + extension = i + break + } + } + + if extension == "" { + return filename, fmt.Errorf("could not match extension for media") + } + + filename = fmt.Sprintf("%s%s", id, extension) + file, err := os.Create(filepath.Join(dir, filename)) + + if err != nil { + return filename, err + } + + defer file.Close() + _, err = io.Copy(file, res.Body) + + if err != nil { + return filename, err + } + + return filename, nil +} + +func resolveTemplate(templateFile string) (*template.Template, error) { + converter := md.NewConverter("", true, nil) + + funcs := template.FuncMap{ + "tomd": converter.ConvertString, + } + + if templateFile == "" { + tmpl, err := template.New("post.tmpl").Funcs(funcs).ParseFS(templates, "templates/*.tmpl") + + if err != nil { + return tmpl, err + } + + return tmpl, nil + } + + tmpl, err := template.New(filepath.Base(templateFile)).Funcs(funcs).ParseGlob(templateFile) + + if err != nil { + return tmpl, err + } + + return tmpl, nil +} +>>>>>>> Stashed changes diff --git a/files/templates/post.tmpl b/files/templates/post.tmpl index 15f01ab..85caa09 100644 --- a/files/templates/post.tmpl +++ b/files/templates/post.tmpl @@ -5,27 +5,33 @@ in_reply_to: {{ .Post.InReplyToId }} {{- end }} post_uri: {{ .Post.URI }} post_id: {{ .Post.Id }} -{{- if len .Tags }} +{{- if len .Post.AllTags }} tags: -{{- range .Tags }} +{{- range .Post.AllTags }} - {{ .Name }} {{- end }} {{- end }} -{{- if len .Descendants }} +{{- if len .Post.Descendants }} descendants: -{{- range .Descendants }} +{{- range .Post.Descendants }} - {{ .Id }} {{- end }} {{- end }} --- {{ .Post.Content | tomd }} +<<<<<<< Updated upstream {{- range .Post.MediaAttachments }} {{ if eq .Type "image" }} +======= +{{ range .Post.MediaAttachments }} +{{- if eq .Type "image" }} +>>>>>>> Stashed changes ![{{ .Description }}]({{ .Path }}) {{ end }} {{- end -}} +<<<<<<< Updated upstream {{- range .Descendants }} {{ .Content | tomd -}} {{- range .MediaAttachments }} @@ -34,3 +40,13 @@ descendants: {{ end }} {{- end -}} {{- end -}} +======= +{{ range .Post.Descendants }} +{{ .Content | tomd }} +{{ range .MediaAttachments }} +{{- if eq .Type "image" }} +![{{ .Description }}]({{ .Path }}) +{{- end }} +{{- end }} +{{- end }} +>>>>>>> Stashed changes diff --git a/main.go b/main.go index 34b9358..844e4e2 100644 --- a/main.go +++ b/main.go @@ -1,3 +1,4 @@ +<<<<<<< Updated upstream package main import ( @@ -101,3 +102,98 @@ func persistId(postId string, path string) error { return nil } +======= +package main + +import ( + "flag" + "fmt" + "log" + "os" + "path/filepath" + + "git.garrido.io/gabriel/mastodon-markdown-archive/client" + "git.garrido.io/gabriel/mastodon-markdown-archive/files" +) + +func main() { + dist := flag.String("dist", "./posts", "Path to directory where files will be written") + user := flag.String("user", "", "URL of User's Mastodon account whose toots will be fetched") + excludeReplies := flag.Bool("exclude-replies", false, "Whether or not exclude replies to other users") + excludeReblogs := flag.Bool("exclude-reblogs", false, "Whether or not to exclude reblogs") + limit := flag.Int("limit", 40, "Maximum number of posts to fetch") + sinceId := flag.String("since-id", "", "Fetch posts greater than this id") + maxId := flag.String("max-id", "", "Fetch posts lesser than this id") + minId := flag.String("min-id", "", "Fetch posts immediately newer than this id") + persistFirst := flag.String("persist-first", "", "Location to persist the post id of the first post returned") + persistLast := flag.String("persist-last", "", "Location to persist the post id of the last post returned") + templateFile := flag.String("template", "", "Template to use for post rendering, if passed") + threaded := flag.Bool("threaded", true, "Thread replies for a post in a single file") + + flag.Parse() + + c, err := client.New(*user, client.PostsFilter{ + ExcludeReplies: *excludeReplies, + ExcludeReblogs: *excludeReblogs, + Limit: *limit, + SinceId: *sinceId, + MaxId: *maxId, + MinId: *minId, + }, *threaded) + + if err != nil { + log.Panicln(err) + } + + fileWriter, err := files.New(*dist) + posts := c.Posts() + postsCount := len(posts) + + log.Println(fmt.Sprintf("Fetched %d posts", postsCount)) + + for _, post := range posts { + if post.ShouldSkip() { + continue + } + + if err := fileWriter.Write(post, *templateFile); err != nil { + log.Panicln("error writing post to file: %w", err) + break + } + } + + if postsCount > 0 { + if *persistFirst != "" { + firstPost := posts[0] + err := persistId(firstPost.Id, *persistFirst) + + if err != nil { + log.Panicln(err) + } + } + + if *persistLast != "" { + lastPost := posts[postsCount-1] + err := persistId(lastPost.Id, *persistLast) + + if err != nil { + log.Panicln(err) + } + } + } +} + +func persistId(postId string, path string) error { + persistPath, err := filepath.Abs(path) + + if err != nil { + return err + } + + if err := os.WriteFile(persistPath, []byte(postId), 0644); err != nil { + return err + } + + return nil +} +>>>>>>> Stashed changes