mirror of
https://github.com/adulau/mastodon-markdown-archive.git
synced 2025-01-03 06:23:15 +00:00
WIP: move threading to Client
This commit is contained in:
parent
5af659241f
commit
437db12b8d
4 changed files with 660 additions and 4 deletions
311
client/client.go
311
client/client.go
|
@ -1,3 +1,4 @@
|
|||
<<<<<<< Updated upstream
|
||||
package client
|
||||
|
||||
import (
|
||||
|
@ -215,3 +216,313 @@ func get(requestUrl string, variable interface{}) error {
|
|||
func ShouldSkipPost(post Post) bool {
|
||||
return post.Visibility != "public"
|
||||
}
|
||||
=======
|
||||
package client
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
handle string
|
||||
baseURL string
|
||||
filters PostsFilter
|
||||
account Account
|
||||
posts []Post
|
||||
replies map[string]Post
|
||||
orphans []string
|
||||
postIdMap map[string]Post
|
||||
}
|
||||
|
||||
type Account struct {
|
||||
Id string `json:"id"`
|
||||
Username string `json:"username"`
|
||||
Acct string `json:"acct"`
|
||||
DisplayName string `json:"display_name"`
|
||||
Locked bool `json:"locked"`
|
||||
Bot bool `json:"bot"`
|
||||
Discoverable bool `json:"discoverable"`
|
||||
Group bool `json:"group"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Note string `json:"note"`
|
||||
URL string `json:"url"`
|
||||
URI string `json:"uri"`
|
||||
Avatar string `json:"avatar"`
|
||||
AvatarStatic string `json:"avatar_static"`
|
||||
Header string `json:"header"`
|
||||
HeaderStatic string `json:"header_static"`
|
||||
FollowersCount int `json:"followers_count"`
|
||||
FollowingCount int `json:"following_count"`
|
||||
StatusesCount int `json:"statuses_count"`
|
||||
LastStatusAt string `json:"last_status_at"`
|
||||
}
|
||||
|
||||
type MediaAttachment struct {
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
Description string `json:"description"`
|
||||
Id string `json:"id"`
|
||||
Path string
|
||||
}
|
||||
|
||||
type Application struct {
|
||||
Name string `json:"name"`
|
||||
Website string `json:"website"`
|
||||
}
|
||||
|
||||
type Tag struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type Post struct {
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Id string `json:"id"`
|
||||
Visibility string `json:"visibility"`
|
||||
InReplyToId string `json:"in_reply_to_id"`
|
||||
InReplyToAccountId string `json:"in_reply_to_account_id"`
|
||||
Sensitive bool `json:"sensitive"`
|
||||
SpoilerText string `json:"spoiler_text"`
|
||||
Language string `json:"language"`
|
||||
URI string `json:"uri"`
|
||||
URL string `json:"url"`
|
||||
Application Application `json:"application"`
|
||||
Content string `json:"content"`
|
||||
MediaAttachments []MediaAttachment `json:"media_attachments"`
|
||||
RepliesCount int `json:"replies_count"`
|
||||
ReblogsCount int `json:"reblogs_count"`
|
||||
FavoritesCount int `json:"favourites_count"`
|
||||
Pinned bool `json:"pinned"`
|
||||
Tags []Tag `json:"tags"`
|
||||
Favourited bool `json:"favourited"`
|
||||
Reblogged bool `json:"reblogged"`
|
||||
Muted bool `json:"muted"`
|
||||
Bookmarked bool `json:"bookmarked"`
|
||||
Account Account `json:"account"`
|
||||
descendants []Post
|
||||
}
|
||||
|
||||
type PostsFilter struct {
|
||||
ExcludeReplies bool
|
||||
ExcludeReblogs bool
|
||||
Limit int
|
||||
SinceId string
|
||||
MinId string
|
||||
MaxId string
|
||||
}
|
||||
|
||||
func New(userURL string, filters PostsFilter, threaded bool) (Client, error) {
|
||||
var client Client
|
||||
parsedURL, err := url.Parse(userURL)
|
||||
|
||||
if err != nil {
|
||||
return client, fmt.Errorf("error parsing user url: %w", err)
|
||||
}
|
||||
|
||||
baseURL := fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Host)
|
||||
acc := strings.TrimPrefix(parsedURL.Path, "/")
|
||||
handle := strings.TrimPrefix(acc, "@")
|
||||
|
||||
account, err := getAccount(baseURL, handle)
|
||||
|
||||
if err != nil {
|
||||
return client, err
|
||||
}
|
||||
|
||||
posts, err := getPosts(baseURL, account.Id, filters)
|
||||
|
||||
if err != nil {
|
||||
return client, err
|
||||
}
|
||||
|
||||
var orphans []string
|
||||
client = Client{
|
||||
baseURL: baseURL,
|
||||
handle: handle,
|
||||
filters: filters,
|
||||
account: account,
|
||||
posts: posts,
|
||||
postIdMap: make(map[string]Post),
|
||||
replies: make(map[string]Post),
|
||||
orphans: orphans,
|
||||
}
|
||||
|
||||
client.populateIdMap()
|
||||
|
||||
if threaded {
|
||||
client.generateReplies()
|
||||
}
|
||||
|
||||
for _, orphan := range client.orphans {
|
||||
log.Println(fmt.Sprintf("Orphan: %s", orphan))
|
||||
}
|
||||
|
||||
return client, nil
|
||||
}
|
||||
|
||||
func (c Client) Account() Account {
|
||||
return c.account
|
||||
}
|
||||
|
||||
func (c Client) Posts() []Post {
|
||||
return c.posts
|
||||
}
|
||||
|
||||
func (p Post) ShouldSkip() bool {
|
||||
return p.Visibility != "public"
|
||||
}
|
||||
|
||||
func (p Post) Descendants() []Post {
|
||||
return p.descendants
|
||||
}
|
||||
|
||||
func (c Client) populateIdMap() {
|
||||
for _, post := range c.posts {
|
||||
c.postIdMap[post.Id] = post
|
||||
}
|
||||
}
|
||||
|
||||
func (c Client) flushReplies(post Post, descendants *[]Post) {
|
||||
if reply, ok := c.replies[post.Id]; ok {
|
||||
*descendants = append(*descendants, reply)
|
||||
c.flushReplies(reply, descendants)
|
||||
}
|
||||
}
|
||||
|
||||
func (c Client) generateReplies() {
|
||||
for _, post := range c.posts {
|
||||
if post.InReplyToId == "" {
|
||||
c.flushReplies(post, &post.descendants)
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := c.postIdMap[post.Id]; ok {
|
||||
log.Println(fmt.Sprintf("Adding %s to replies of %s", post.Id, post.InReplyToId))
|
||||
c.replies[post.InReplyToId] = post
|
||||
} else {
|
||||
log.Println("Found orphan")
|
||||
c.orphans = append(c.orphans, post.Id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getPosts(baseURL string, accountId string, filters PostsFilter) ([]Post, error) {
|
||||
var posts []Post
|
||||
|
||||
queryValues := url.Values{}
|
||||
|
||||
if filters.ExcludeReplies {
|
||||
queryValues.Add("exclude_replies", strconv.Itoa(1))
|
||||
}
|
||||
|
||||
if filters.ExcludeReblogs {
|
||||
queryValues.Add("exclude_reblogs", strconv.Itoa(1))
|
||||
}
|
||||
|
||||
if filters.SinceId != "" {
|
||||
queryValues.Add("since_id", filters.SinceId)
|
||||
}
|
||||
|
||||
if filters.MaxId != "" {
|
||||
queryValues.Add("max_id", filters.MaxId)
|
||||
}
|
||||
|
||||
if filters.MinId != "" {
|
||||
queryValues.Add("min_id", filters.MinId)
|
||||
}
|
||||
|
||||
queryValues.Add("limit", strconv.Itoa(filters.Limit))
|
||||
|
||||
query := fmt.Sprintf("?%s", queryValues.Encode())
|
||||
|
||||
postsUrl := fmt.Sprintf(
|
||||
"%s/api/v1/accounts/%s/statuses/%s",
|
||||
baseURL,
|
||||
accountId,
|
||||
query,
|
||||
)
|
||||
|
||||
log.Println(fmt.Sprintf("Fetching posts from %s", postsUrl))
|
||||
|
||||
if err := get(postsUrl, &posts); err != nil {
|
||||
return posts, err
|
||||
}
|
||||
|
||||
return posts, nil
|
||||
}
|
||||
|
||||
func getAccount(baseURL string, handle string) (Account, error) {
|
||||
var account Account
|
||||
lookupUrl := fmt.Sprintf(
|
||||
"%s/api/v1/accounts/lookup?acct=%s",
|
||||
baseURL,
|
||||
handle,
|
||||
)
|
||||
|
||||
err := get(lookupUrl, &account)
|
||||
|
||||
if err != nil {
|
||||
return account, err
|
||||
}
|
||||
|
||||
return account, nil
|
||||
}
|
||||
|
||||
func (p Post) AllTags() []Tag {
|
||||
var tags []Tag
|
||||
|
||||
for _, tag := range p.Tags {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
|
||||
for _, descendant := range p.descendants {
|
||||
for _, tag := range descendant.Tags {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
return tags
|
||||
}
|
||||
|
||||
func (p Post) AllMedia() []MediaAttachment {
|
||||
var media []MediaAttachment
|
||||
|
||||
for _, item := range p.MediaAttachments {
|
||||
media = append(media, item)
|
||||
}
|
||||
|
||||
for _, descendant := range p.descendants {
|
||||
for _, item := range descendant.MediaAttachments {
|
||||
media = append(media, item)
|
||||
}
|
||||
}
|
||||
|
||||
return media
|
||||
}
|
||||
|
||||
func get(requestUrl string, variable interface{}) error {
|
||||
res, err := http.Get(requestUrl)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(res.Body)
|
||||
|
||||
if err := json.Unmarshal(body, variable); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
>>>>>>> Stashed changes
|
||||
|
|
233
files/files.go
233
files/files.go
|
@ -1,3 +1,4 @@
|
|||
<<<<<<< Updated upstream
|
||||
package files
|
||||
|
||||
import (
|
||||
|
@ -193,3 +194,235 @@ func resolveTemplate(templateFile string) (*template.Template, error) {
|
|||
|
||||
return tmpl, nil
|
||||
}
|
||||
=======
|
||||
package files
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"text/template"
|
||||
|
||||
"git.garrido.io/gabriel/mastodon-markdown-archive/client"
|
||||
md "github.com/JohannesKaufmann/html-to-markdown"
|
||||
)
|
||||
|
||||
//go:embed templates/post.tmpl
|
||||
var templates embed.FS
|
||||
|
||||
type FileWriter struct {
|
||||
dir string
|
||||
}
|
||||
|
||||
type TemplateContext struct {
|
||||
Post client.Post
|
||||
}
|
||||
|
||||
type PostFile struct {
|
||||
Dir string
|
||||
Name string
|
||||
File *os.File
|
||||
}
|
||||
|
||||
func New(dir string) (FileWriter, error) {
|
||||
var fileWriter FileWriter
|
||||
_, err := os.Stat(dir)
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
os.Mkdir(dir, os.ModePerm)
|
||||
}
|
||||
|
||||
absDir, err := filepath.Abs(dir)
|
||||
|
||||
if err != nil {
|
||||
return fileWriter, err
|
||||
}
|
||||
|
||||
return FileWriter{
|
||||
dir: absDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f FileWriter) Write(post client.Post, templateFile string) error {
|
||||
hasMedia := len(post.AllMedia()) > 0
|
||||
postFile, err := f.createFile(post, hasMedia)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer postFile.File.Close()
|
||||
|
||||
if len(post.MediaAttachments) > 0 {
|
||||
err = downloadAttachments(post.MediaAttachments, postFile.Dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, descendant := range post.Descendants() {
|
||||
if len(descendant.MediaAttachments) > 0 {
|
||||
err = downloadAttachments(descendant.MediaAttachments, postFile.Dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tmpl, err := resolveTemplate(templateFile)
|
||||
context := TemplateContext{
|
||||
Post: post,
|
||||
}
|
||||
|
||||
err = tmpl.Execute(postFile.File, context)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f FileWriter) createFile(post client.Post, shouldBundle bool) (PostFile, error) {
|
||||
var postFile PostFile
|
||||
|
||||
if shouldBundle {
|
||||
dir := filepath.Join(f.dir, post.Id)
|
||||
|
||||
_, err := os.Stat(dir)
|
||||
if os.IsNotExist(err) {
|
||||
os.Mkdir(dir, os.ModePerm)
|
||||
}
|
||||
|
||||
name := filepath.Join(dir, "index.md")
|
||||
file, err := os.Create(name)
|
||||
|
||||
if err != nil {
|
||||
return postFile, err
|
||||
}
|
||||
|
||||
postFile = PostFile{
|
||||
Name: name,
|
||||
Dir: dir,
|
||||
File: file,
|
||||
}
|
||||
|
||||
return postFile, nil
|
||||
}
|
||||
|
||||
name := filepath.Join(f.dir, fmt.Sprintf("%s.md", post.Id))
|
||||
file, err := os.Create(name)
|
||||
|
||||
if err != nil {
|
||||
return postFile, err
|
||||
}
|
||||
|
||||
postFile = PostFile{
|
||||
Name: name,
|
||||
Dir: f.dir,
|
||||
File: file,
|
||||
}
|
||||
|
||||
return postFile, nil
|
||||
}
|
||||
|
||||
func downloadAttachments(attachments []client.MediaAttachment, dir string) error {
|
||||
for i := 0; i < len(attachments); i++ {
|
||||
media := &attachments[i]
|
||||
if media.Type != "image" {
|
||||
continue
|
||||
}
|
||||
|
||||
imageFilename, err := downloadAttachment(dir, media.Id, media.URL)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
media.Path = imageFilename
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func downloadAttachment(dir string, id string, url string) (string, error) {
|
||||
var filename string
|
||||
|
||||
client := &http.Client{}
|
||||
req, _ := http.NewRequest("GET", url, nil)
|
||||
req.Header.Set("Accept", "image/*")
|
||||
res, err := client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
|
||||
contentType := res.Header.Get("Content-Type")
|
||||
extensions, err := mime.ExtensionsByType(contentType)
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
}
|
||||
|
||||
var extension string
|
||||
urlExtension := filepath.Ext(url)
|
||||
|
||||
for _, i := range extensions {
|
||||
if i == urlExtension {
|
||||
extension = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if extension == "" {
|
||||
return filename, fmt.Errorf("could not match extension for media")
|
||||
}
|
||||
|
||||
filename = fmt.Sprintf("%s%s", id, extension)
|
||||
file, err := os.Create(filepath.Join(dir, filename))
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
}
|
||||
|
||||
defer file.Close()
|
||||
_, err = io.Copy(file, res.Body)
|
||||
|
||||
if err != nil {
|
||||
return filename, err
|
||||
}
|
||||
|
||||
return filename, nil
|
||||
}
|
||||
|
||||
func resolveTemplate(templateFile string) (*template.Template, error) {
|
||||
converter := md.NewConverter("", true, nil)
|
||||
|
||||
funcs := template.FuncMap{
|
||||
"tomd": converter.ConvertString,
|
||||
}
|
||||
|
||||
if templateFile == "" {
|
||||
tmpl, err := template.New("post.tmpl").Funcs(funcs).ParseFS(templates, "templates/*.tmpl")
|
||||
|
||||
if err != nil {
|
||||
return tmpl, err
|
||||
}
|
||||
|
||||
return tmpl, nil
|
||||
}
|
||||
|
||||
tmpl, err := template.New(filepath.Base(templateFile)).Funcs(funcs).ParseGlob(templateFile)
|
||||
|
||||
if err != nil {
|
||||
return tmpl, err
|
||||
}
|
||||
|
||||
return tmpl, nil
|
||||
}
|
||||
>>>>>>> Stashed changes
|
||||
|
|
|
@ -5,27 +5,33 @@ in_reply_to: {{ .Post.InReplyToId }}
|
|||
{{- end }}
|
||||
post_uri: {{ .Post.URI }}
|
||||
post_id: {{ .Post.Id }}
|
||||
{{- if len .Tags }}
|
||||
{{- if len .Post.AllTags }}
|
||||
tags:
|
||||
{{- range .Tags }}
|
||||
{{- range .Post.AllTags }}
|
||||
- {{ .Name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if len .Descendants }}
|
||||
{{- if len .Post.Descendants }}
|
||||
descendants:
|
||||
{{- range .Descendants }}
|
||||
{{- range .Post.Descendants }}
|
||||
- {{ .Id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{ .Post.Content | tomd }}
|
||||
<<<<<<< Updated upstream
|
||||
|
||||
{{- range .Post.MediaAttachments }}
|
||||
{{ if eq .Type "image" }}
|
||||
=======
|
||||
{{ range .Post.MediaAttachments }}
|
||||
{{- if eq .Type "image" }}
|
||||
>>>>>>> Stashed changes
|
||||
![{{ .Description }}]({{ .Path }})
|
||||
{{ end }}
|
||||
{{- end -}}
|
||||
|
||||
<<<<<<< Updated upstream
|
||||
{{- range .Descendants }}
|
||||
{{ .Content | tomd -}}
|
||||
{{- range .MediaAttachments }}
|
||||
|
@ -34,3 +40,13 @@ descendants:
|
|||
{{ end }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
=======
|
||||
{{ range .Post.Descendants }}
|
||||
{{ .Content | tomd }}
|
||||
{{ range .MediaAttachments }}
|
||||
{{- if eq .Type "image" }}
|
||||
![{{ .Description }}]({{ .Path }})
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
>>>>>>> Stashed changes
|
||||
|
|
96
main.go
96
main.go
|
@ -1,3 +1,4 @@
|
|||
<<<<<<< Updated upstream
|
||||
package main
|
||||
|
||||
import (
|
||||
|
@ -101,3 +102,98 @@ func persistId(postId string, path string) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
=======
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"git.garrido.io/gabriel/mastodon-markdown-archive/client"
|
||||
"git.garrido.io/gabriel/mastodon-markdown-archive/files"
|
||||
)
|
||||
|
||||
func main() {
|
||||
dist := flag.String("dist", "./posts", "Path to directory where files will be written")
|
||||
user := flag.String("user", "", "URL of User's Mastodon account whose toots will be fetched")
|
||||
excludeReplies := flag.Bool("exclude-replies", false, "Whether or not exclude replies to other users")
|
||||
excludeReblogs := flag.Bool("exclude-reblogs", false, "Whether or not to exclude reblogs")
|
||||
limit := flag.Int("limit", 40, "Maximum number of posts to fetch")
|
||||
sinceId := flag.String("since-id", "", "Fetch posts greater than this id")
|
||||
maxId := flag.String("max-id", "", "Fetch posts lesser than this id")
|
||||
minId := flag.String("min-id", "", "Fetch posts immediately newer than this id")
|
||||
persistFirst := flag.String("persist-first", "", "Location to persist the post id of the first post returned")
|
||||
persistLast := flag.String("persist-last", "", "Location to persist the post id of the last post returned")
|
||||
templateFile := flag.String("template", "", "Template to use for post rendering, if passed")
|
||||
threaded := flag.Bool("threaded", true, "Thread replies for a post in a single file")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
c, err := client.New(*user, client.PostsFilter{
|
||||
ExcludeReplies: *excludeReplies,
|
||||
ExcludeReblogs: *excludeReblogs,
|
||||
Limit: *limit,
|
||||
SinceId: *sinceId,
|
||||
MaxId: *maxId,
|
||||
MinId: *minId,
|
||||
}, *threaded)
|
||||
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
fileWriter, err := files.New(*dist)
|
||||
posts := c.Posts()
|
||||
postsCount := len(posts)
|
||||
|
||||
log.Println(fmt.Sprintf("Fetched %d posts", postsCount))
|
||||
|
||||
for _, post := range posts {
|
||||
if post.ShouldSkip() {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := fileWriter.Write(post, *templateFile); err != nil {
|
||||
log.Panicln("error writing post to file: %w", err)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if postsCount > 0 {
|
||||
if *persistFirst != "" {
|
||||
firstPost := posts[0]
|
||||
err := persistId(firstPost.Id, *persistFirst)
|
||||
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
}
|
||||
|
||||
if *persistLast != "" {
|
||||
lastPost := posts[postsCount-1]
|
||||
err := persistId(lastPost.Id, *persistLast)
|
||||
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func persistId(postId string, path string) error {
|
||||
persistPath, err := filepath.Abs(path)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.WriteFile(persistPath, []byte(postId), 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
>>>>>>> Stashed changes
|
||||
|
|
Loading…
Reference in a new issue