WIP: move threading to Client

This commit is contained in:
Gabriel Garrido 2024-05-13 10:47:26 +02:00
parent 5af659241f
commit 437db12b8d
4 changed files with 660 additions and 4 deletions

View file

@ -1,3 +1,4 @@
<<<<<<< Updated upstream
package client package client
import ( import (
@ -215,3 +216,313 @@ func get(requestUrl string, variable interface{}) error {
func ShouldSkipPost(post Post) bool { func ShouldSkipPost(post Post) bool {
return post.Visibility != "public" return post.Visibility != "public"
} }
=======
package client
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strconv"
"strings"
"time"
)
type Client struct {
handle string
baseURL string
filters PostsFilter
account Account
posts []Post
replies map[string]Post
orphans []string
postIdMap map[string]Post
}
type Account struct {
Id string `json:"id"`
Username string `json:"username"`
Acct string `json:"acct"`
DisplayName string `json:"display_name"`
Locked bool `json:"locked"`
Bot bool `json:"bot"`
Discoverable bool `json:"discoverable"`
Group bool `json:"group"`
CreatedAt time.Time `json:"created_at"`
Note string `json:"note"`
URL string `json:"url"`
URI string `json:"uri"`
Avatar string `json:"avatar"`
AvatarStatic string `json:"avatar_static"`
Header string `json:"header"`
HeaderStatic string `json:"header_static"`
FollowersCount int `json:"followers_count"`
FollowingCount int `json:"following_count"`
StatusesCount int `json:"statuses_count"`
LastStatusAt string `json:"last_status_at"`
}
type MediaAttachment struct {
Type string `json:"type"`
URL string `json:"url"`
Description string `json:"description"`
Id string `json:"id"`
Path string
}
type Application struct {
Name string `json:"name"`
Website string `json:"website"`
}
type Tag struct {
Name string `json:"name"`
URL string `json:"url"`
}
type Post struct {
CreatedAt time.Time `json:"created_at"`
Id string `json:"id"`
Visibility string `json:"visibility"`
InReplyToId string `json:"in_reply_to_id"`
InReplyToAccountId string `json:"in_reply_to_account_id"`
Sensitive bool `json:"sensitive"`
SpoilerText string `json:"spoiler_text"`
Language string `json:"language"`
URI string `json:"uri"`
URL string `json:"url"`
Application Application `json:"application"`
Content string `json:"content"`
MediaAttachments []MediaAttachment `json:"media_attachments"`
RepliesCount int `json:"replies_count"`
ReblogsCount int `json:"reblogs_count"`
FavoritesCount int `json:"favourites_count"`
Pinned bool `json:"pinned"`
Tags []Tag `json:"tags"`
Favourited bool `json:"favourited"`
Reblogged bool `json:"reblogged"`
Muted bool `json:"muted"`
Bookmarked bool `json:"bookmarked"`
Account Account `json:"account"`
descendants []Post
}
type PostsFilter struct {
ExcludeReplies bool
ExcludeReblogs bool
Limit int
SinceId string
MinId string
MaxId string
}
func New(userURL string, filters PostsFilter, threaded bool) (Client, error) {
var client Client
parsedURL, err := url.Parse(userURL)
if err != nil {
return client, fmt.Errorf("error parsing user url: %w", err)
}
baseURL := fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Host)
acc := strings.TrimPrefix(parsedURL.Path, "/")
handle := strings.TrimPrefix(acc, "@")
account, err := getAccount(baseURL, handle)
if err != nil {
return client, err
}
posts, err := getPosts(baseURL, account.Id, filters)
if err != nil {
return client, err
}
var orphans []string
client = Client{
baseURL: baseURL,
handle: handle,
filters: filters,
account: account,
posts: posts,
postIdMap: make(map[string]Post),
replies: make(map[string]Post),
orphans: orphans,
}
client.populateIdMap()
if threaded {
client.generateReplies()
}
for _, orphan := range client.orphans {
log.Println(fmt.Sprintf("Orphan: %s", orphan))
}
return client, nil
}
func (c Client) Account() Account {
return c.account
}
func (c Client) Posts() []Post {
return c.posts
}
func (p Post) ShouldSkip() bool {
return p.Visibility != "public"
}
func (p Post) Descendants() []Post {
return p.descendants
}
func (c Client) populateIdMap() {
for _, post := range c.posts {
c.postIdMap[post.Id] = post
}
}
func (c Client) flushReplies(post Post, descendants *[]Post) {
if reply, ok := c.replies[post.Id]; ok {
*descendants = append(*descendants, reply)
c.flushReplies(reply, descendants)
}
}
func (c Client) generateReplies() {
for _, post := range c.posts {
if post.InReplyToId == "" {
c.flushReplies(post, &post.descendants)
continue
}
if _, ok := c.postIdMap[post.Id]; ok {
log.Println(fmt.Sprintf("Adding %s to replies of %s", post.Id, post.InReplyToId))
c.replies[post.InReplyToId] = post
} else {
log.Println("Found orphan")
c.orphans = append(c.orphans, post.Id)
}
}
}
func getPosts(baseURL string, accountId string, filters PostsFilter) ([]Post, error) {
var posts []Post
queryValues := url.Values{}
if filters.ExcludeReplies {
queryValues.Add("exclude_replies", strconv.Itoa(1))
}
if filters.ExcludeReblogs {
queryValues.Add("exclude_reblogs", strconv.Itoa(1))
}
if filters.SinceId != "" {
queryValues.Add("since_id", filters.SinceId)
}
if filters.MaxId != "" {
queryValues.Add("max_id", filters.MaxId)
}
if filters.MinId != "" {
queryValues.Add("min_id", filters.MinId)
}
queryValues.Add("limit", strconv.Itoa(filters.Limit))
query := fmt.Sprintf("?%s", queryValues.Encode())
postsUrl := fmt.Sprintf(
"%s/api/v1/accounts/%s/statuses/%s",
baseURL,
accountId,
query,
)
log.Println(fmt.Sprintf("Fetching posts from %s", postsUrl))
if err := get(postsUrl, &posts); err != nil {
return posts, err
}
return posts, nil
}
func getAccount(baseURL string, handle string) (Account, error) {
var account Account
lookupUrl := fmt.Sprintf(
"%s/api/v1/accounts/lookup?acct=%s",
baseURL,
handle,
)
err := get(lookupUrl, &account)
if err != nil {
return account, err
}
return account, nil
}
func (p Post) AllTags() []Tag {
var tags []Tag
for _, tag := range p.Tags {
tags = append(tags, tag)
}
for _, descendant := range p.descendants {
for _, tag := range descendant.Tags {
tags = append(tags, tag)
}
}
return tags
}
func (p Post) AllMedia() []MediaAttachment {
var media []MediaAttachment
for _, item := range p.MediaAttachments {
media = append(media, item)
}
for _, descendant := range p.descendants {
for _, item := range descendant.MediaAttachments {
media = append(media, item)
}
}
return media
}
func get(requestUrl string, variable interface{}) error {
res, err := http.Get(requestUrl)
if err != nil {
return err
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err := json.Unmarshal(body, variable); err != nil {
return err
}
return nil
}
>>>>>>> Stashed changes

View file

@ -1,3 +1,4 @@
<<<<<<< Updated upstream
package files package files
import ( import (
@ -193,3 +194,235 @@ func resolveTemplate(templateFile string) (*template.Template, error) {
return tmpl, nil return tmpl, nil
} }
=======
package files
import (
"embed"
"fmt"
"io"
"mime"
"net/http"
"os"
"path/filepath"
"text/template"
"git.garrido.io/gabriel/mastodon-markdown-archive/client"
md "github.com/JohannesKaufmann/html-to-markdown"
)
//go:embed templates/post.tmpl
var templates embed.FS
type FileWriter struct {
dir string
}
type TemplateContext struct {
Post client.Post
}
type PostFile struct {
Dir string
Name string
File *os.File
}
func New(dir string) (FileWriter, error) {
var fileWriter FileWriter
_, err := os.Stat(dir)
if os.IsNotExist(err) {
os.Mkdir(dir, os.ModePerm)
}
absDir, err := filepath.Abs(dir)
if err != nil {
return fileWriter, err
}
return FileWriter{
dir: absDir,
}, nil
}
func (f FileWriter) Write(post client.Post, templateFile string) error {
hasMedia := len(post.AllMedia()) > 0
postFile, err := f.createFile(post, hasMedia)
if err != nil {
return err
}
defer postFile.File.Close()
if len(post.MediaAttachments) > 0 {
err = downloadAttachments(post.MediaAttachments, postFile.Dir)
if err != nil {
return err
}
}
for _, descendant := range post.Descendants() {
if len(descendant.MediaAttachments) > 0 {
err = downloadAttachments(descendant.MediaAttachments, postFile.Dir)
if err != nil {
return err
}
}
}
tmpl, err := resolveTemplate(templateFile)
context := TemplateContext{
Post: post,
}
err = tmpl.Execute(postFile.File, context)
if err != nil {
return err
}
return nil
}
func (f FileWriter) createFile(post client.Post, shouldBundle bool) (PostFile, error) {
var postFile PostFile
if shouldBundle {
dir := filepath.Join(f.dir, post.Id)
_, err := os.Stat(dir)
if os.IsNotExist(err) {
os.Mkdir(dir, os.ModePerm)
}
name := filepath.Join(dir, "index.md")
file, err := os.Create(name)
if err != nil {
return postFile, err
}
postFile = PostFile{
Name: name,
Dir: dir,
File: file,
}
return postFile, nil
}
name := filepath.Join(f.dir, fmt.Sprintf("%s.md", post.Id))
file, err := os.Create(name)
if err != nil {
return postFile, err
}
postFile = PostFile{
Name: name,
Dir: f.dir,
File: file,
}
return postFile, nil
}
func downloadAttachments(attachments []client.MediaAttachment, dir string) error {
for i := 0; i < len(attachments); i++ {
media := &attachments[i]
if media.Type != "image" {
continue
}
imageFilename, err := downloadAttachment(dir, media.Id, media.URL)
if err != nil {
return err
}
media.Path = imageFilename
}
return nil
}
func downloadAttachment(dir string, id string, url string) (string, error) {
var filename string
client := &http.Client{}
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("Accept", "image/*")
res, err := client.Do(req)
if err != nil {
return filename, err
}
defer res.Body.Close()
contentType := res.Header.Get("Content-Type")
extensions, err := mime.ExtensionsByType(contentType)
if err != nil {
return filename, err
}
var extension string
urlExtension := filepath.Ext(url)
for _, i := range extensions {
if i == urlExtension {
extension = i
break
}
}
if extension == "" {
return filename, fmt.Errorf("could not match extension for media")
}
filename = fmt.Sprintf("%s%s", id, extension)
file, err := os.Create(filepath.Join(dir, filename))
if err != nil {
return filename, err
}
defer file.Close()
_, err = io.Copy(file, res.Body)
if err != nil {
return filename, err
}
return filename, nil
}
func resolveTemplate(templateFile string) (*template.Template, error) {
converter := md.NewConverter("", true, nil)
funcs := template.FuncMap{
"tomd": converter.ConvertString,
}
if templateFile == "" {
tmpl, err := template.New("post.tmpl").Funcs(funcs).ParseFS(templates, "templates/*.tmpl")
if err != nil {
return tmpl, err
}
return tmpl, nil
}
tmpl, err := template.New(filepath.Base(templateFile)).Funcs(funcs).ParseGlob(templateFile)
if err != nil {
return tmpl, err
}
return tmpl, nil
}
>>>>>>> Stashed changes

View file

@ -5,27 +5,33 @@ in_reply_to: {{ .Post.InReplyToId }}
{{- end }} {{- end }}
post_uri: {{ .Post.URI }} post_uri: {{ .Post.URI }}
post_id: {{ .Post.Id }} post_id: {{ .Post.Id }}
{{- if len .Tags }} {{- if len .Post.AllTags }}
tags: tags:
{{- range .Tags }} {{- range .Post.AllTags }}
- {{ .Name }} - {{ .Name }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if len .Descendants }} {{- if len .Post.Descendants }}
descendants: descendants:
{{- range .Descendants }} {{- range .Post.Descendants }}
- {{ .Id }} - {{ .Id }}
{{- end }} {{- end }}
{{- end }} {{- end }}
--- ---
{{ .Post.Content | tomd }} {{ .Post.Content | tomd }}
<<<<<<< Updated upstream
{{- range .Post.MediaAttachments }} {{- range .Post.MediaAttachments }}
{{ if eq .Type "image" }} {{ if eq .Type "image" }}
=======
{{ range .Post.MediaAttachments }}
{{- if eq .Type "image" }}
>>>>>>> Stashed changes
![{{ .Description }}]({{ .Path }}) ![{{ .Description }}]({{ .Path }})
{{ end }} {{ end }}
{{- end -}} {{- end -}}
<<<<<<< Updated upstream
{{- range .Descendants }} {{- range .Descendants }}
{{ .Content | tomd -}} {{ .Content | tomd -}}
{{- range .MediaAttachments }} {{- range .MediaAttachments }}
@ -34,3 +40,13 @@ descendants:
{{ end }} {{ end }}
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
=======
{{ range .Post.Descendants }}
{{ .Content | tomd }}
{{ range .MediaAttachments }}
{{- if eq .Type "image" }}
![{{ .Description }}]({{ .Path }})
{{- end }}
{{- end }}
{{- end }}
>>>>>>> Stashed changes

96
main.go
View file

@ -1,3 +1,4 @@
<<<<<<< Updated upstream
package main package main
import ( import (
@ -101,3 +102,98 @@ func persistId(postId string, path string) error {
return nil return nil
} }
=======
package main
import (
"flag"
"fmt"
"log"
"os"
"path/filepath"
"git.garrido.io/gabriel/mastodon-markdown-archive/client"
"git.garrido.io/gabriel/mastodon-markdown-archive/files"
)
func main() {
dist := flag.String("dist", "./posts", "Path to directory where files will be written")
user := flag.String("user", "", "URL of User's Mastodon account whose toots will be fetched")
excludeReplies := flag.Bool("exclude-replies", false, "Whether or not exclude replies to other users")
excludeReblogs := flag.Bool("exclude-reblogs", false, "Whether or not to exclude reblogs")
limit := flag.Int("limit", 40, "Maximum number of posts to fetch")
sinceId := flag.String("since-id", "", "Fetch posts greater than this id")
maxId := flag.String("max-id", "", "Fetch posts lesser than this id")
minId := flag.String("min-id", "", "Fetch posts immediately newer than this id")
persistFirst := flag.String("persist-first", "", "Location to persist the post id of the first post returned")
persistLast := flag.String("persist-last", "", "Location to persist the post id of the last post returned")
templateFile := flag.String("template", "", "Template to use for post rendering, if passed")
threaded := flag.Bool("threaded", true, "Thread replies for a post in a single file")
flag.Parse()
c, err := client.New(*user, client.PostsFilter{
ExcludeReplies: *excludeReplies,
ExcludeReblogs: *excludeReblogs,
Limit: *limit,
SinceId: *sinceId,
MaxId: *maxId,
MinId: *minId,
}, *threaded)
if err != nil {
log.Panicln(err)
}
fileWriter, err := files.New(*dist)
posts := c.Posts()
postsCount := len(posts)
log.Println(fmt.Sprintf("Fetched %d posts", postsCount))
for _, post := range posts {
if post.ShouldSkip() {
continue
}
if err := fileWriter.Write(post, *templateFile); err != nil {
log.Panicln("error writing post to file: %w", err)
break
}
}
if postsCount > 0 {
if *persistFirst != "" {
firstPost := posts[0]
err := persistId(firstPost.Id, *persistFirst)
if err != nil {
log.Panicln(err)
}
}
if *persistLast != "" {
lastPost := posts[postsCount-1]
err := persistId(lastPost.Id, *persistLast)
if err != nil {
log.Panicln(err)
}
}
}
}
func persistId(postId string, path string) error {
persistPath, err := filepath.Abs(path)
if err != nil {
return err
}
if err := os.WriteFile(persistPath, []byte(postId), 0644); err != nil {
return err
}
return nil
}
>>>>>>> Stashed changes