john revised this gist . Go to revision
1 file changed, 196 insertions, 5 deletions
ghost2md.java
@@ -24,10 +24,20 @@ import io.github.furstenheim.CopyDown; | |||
24 | 24 | import picocli.CommandLine; | |
25 | 25 | import picocli.CommandLine.Command; | |
26 | 26 | import picocli.CommandLine.Option; | |
27 | + | ||
28 | + | import static java.util.Optional.ofNullable; | |
29 | + | ||
27 | 30 | import java.io.File; | |
31 | + | import java.io.FileOutputStream; | |
28 | 32 | import java.io.FileWriter; | |
33 | + | import java.io.IOException; | |
34 | + | import java.io.PrintWriter; | |
35 | + | import java.net.URI; | |
36 | + | import java.nio.file.Paths; | |
29 | 37 | import java.util.List; | |
38 | + | import java.util.Optional; | |
30 | 39 | import java.util.concurrent.Callable; | |
40 | + | import java.util.regex.Pattern; | |
31 | 41 | ||
32 | 42 | @Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files") | |
33 | 43 | public class ghost2md implements Callable<Integer> { | |
@@ -41,6 +51,13 @@ public class ghost2md implements Callable<Integer> { | |||
41 | 51 | @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory") | |
42 | 52 | private File destinationDir; | |
43 | 53 | ||
54 | + | @Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)") | |
55 | + | private String type; | |
56 | + | ||
57 | + | @Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)") | |
58 | + | private String domain; | |
59 | + | ||
60 | + | ||
44 | 61 | @Override | |
45 | 62 | public Integer call() throws Exception { | |
46 | 63 | if (!exportFile.exists()) { | |
@@ -52,27 +69,201 @@ public class ghost2md implements Callable<Integer> { | |||
52 | 69 | ||
53 | 70 | var json = JSON.parse(exportFile); | |
54 | 71 | var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){}); | |
72 | + | var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef<List<PostMeta>>(){}); | |
73 | + | var tags = json.read("$.db[*].data.tags[*]", new TypeRef<List<Tag>>(){}); | |
74 | + | var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef<List<TagLink>>(){}); | |
75 | + | var authors = json.read("$.db[*].data.users[*]", new TypeRef<List<Author>>(){}); | |
76 | + | var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef<List<AuthorLink>>(){}); | |
77 | + | ||
55 | 78 | for (var post : posts) { | |
79 | + | if (!"all".equals(type) && !post.type().equals(type)) { | |
80 | + | continue; | |
81 | + | } | |
82 | + | ||
56 | 83 | var filename = post.slug() + ".md"; | |
57 | 84 | var markdown = HTML_TO_MD.convert(post.html()); | |
58 | 85 | var file = new File(destinationDir, filename); | |
59 | 86 | var title = "# " + post.title(); | |
87 | + | ||
88 | + | var meta = postMeta.stream() | |
89 | + | .filter(link -> link.post_id().equals(post.id())) | |
90 | + | .findFirst(); | |
91 | + | ||
92 | + | var linkedTags = tagLinks.stream() | |
93 | + | .filter(link -> link.post_id().equals(post.id())) | |
94 | + | .flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id()))) | |
95 | + | .toList(); | |
96 | + | ||
97 | + | var linkedAuthors = authorLinks.stream() | |
98 | + | .filter(link -> link.post_id().equals(post.id())) | |
99 | + | .flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id()))) | |
100 | + | .toList(); | |
101 | + | ||
60 | 102 | System.out.printf("Writing %s\n", file); | |
61 | - | try (var writer = new FileWriter(file)) { | |
62 | - | writer.write(title); | |
63 | - | writer.write("\n\n"); | |
64 | - | writer.write(markdown); | |
103 | + | try (var writer = new PrintWriter(new FileWriter(file), true)) { | |
104 | + | writer.println("---"); | |
105 | + | ||
106 | + | writer.println("date:"); | |
107 | + | ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date)); | |
108 | + | ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date)); | |
109 | + | ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date)); | |
110 | + | ||
111 | + | if (post.custom_excerpt() != null) { | |
112 | + | writer.println("excerpt: \"" + post.custom_excerpt() + "\""); | |
113 | + | } else if(meta.isPresent() && meta.get().title().isPresent()) { | |
114 | + | writer.println("excerpt: \"" + meta.get().title().get() + "\""); | |
115 | + | } else { | |
116 | + | writer.println("excerpt: \"" + post.title() + "\""); | |
117 | + | } | |
118 | + | ||
119 | + | if (post.featured() > 0) { | |
120 | + | writer.println("pin: true"); | |
121 | + | } | |
122 | + | ||
123 | + | if (post.feature_image() != null) { | |
124 | + | var url = post.feature_image(); | |
125 | + | if (url.startsWith("__GHOST_URL__")) { | |
126 | + | url = downloadAsset(url.replace("__GHOST_URL__", domain)); | |
127 | + | } | |
128 | + | writer.println("cover: " + url); | |
129 | + | } else if (meta.isPresent() && meta.get().image().isPresent()) { | |
130 | + | var url = meta.get().image().get(); | |
131 | + | if (url.startsWith("__GHOST_URL__")) { | |
132 | + | url = downloadAsset(url.replace("__GHOST_URL__", domain)); | |
133 | + | } | |
134 | + | writer.println("cover: " + url); | |
135 | + | } else if(!linkedTags.isEmpty()){ | |
136 | + | var url = linkedTags.stream() | |
137 | + | .filter(tag -> tag.feature_image() != null) | |
138 | + | .map(tag -> tag.feature_image()) | |
139 | + | .findFirst() | |
140 | + | .get(); | |
141 | + | if (url.startsWith("__GHOST_URL__")) { | |
142 | + | url = downloadAsset(url.replace("__GHOST_URL__", domain)); | |
143 | + | } | |
144 | + | writer.println("cover: " + url); | |
145 | + | ||
146 | + | } | |
147 | + | ||
148 | + | if (post.status.equals("draft")) { | |
149 | + | writer.println("draft: true"); | |
150 | + | } | |
151 | + | ||
152 | + | if (!linkedTags.isEmpty() || post.featured() > 0) { | |
153 | + | ||
154 | + | writer.println("tags:"); | |
155 | + | ||
156 | + | for (var tag : linkedTags) { | |
157 | + | writer.println(" - slug: " + tag.slug()); | |
158 | + | writer.println(" title: \"" + tag.name() + "\""); | |
159 | + | } | |
160 | + | } | |
161 | + | ||
162 | + | if (!linkedAuthors.isEmpty()) { | |
163 | + | ||
164 | + | writer.println("authors:"); | |
165 | + | ||
166 | + | for (var author : linkedAuthors) { | |
167 | + | writer.println(" - slug: " + author.slug()); | |
168 | + | writer.println(" name: \"" + author.name() + "\""); | |
169 | + | } | |
170 | + | } | |
171 | + | ||
172 | + | writer.println("---"); | |
173 | + | writer.println(); | |
174 | + | ||
175 | + | writer.println(title); | |
176 | + | writer.println(); | |
177 | + | ||
178 | + | var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)"); | |
179 | + | var internalLinks = internalLinkPattern.matcher(markdown); | |
180 | + | markdown = internalLinks.replaceAll(result -> { | |
181 | + | var link = result.group(1); | |
182 | + | var anchor = result.group(2); | |
183 | + | if (link.startsWith("/content") || link.startsWith("/api")) { | |
184 | + | return "(" + downloadAsset(domain + link) + ")"; | |
185 | + | } | |
186 | + | if (link.startsWith("/")) { | |
187 | + | link = link.substring(1); | |
188 | + | } | |
189 | + | if (link.endsWith("/")) { | |
190 | + | link = link.substring(0, link.length() -1); | |
191 | + | } | |
192 | + | if (anchor != null) { | |
193 | + | return "(" + link + ".md" + "#" +anchor + ")"; | |
194 | + | } | |
195 | + | ||
196 | + | System.out.println(link); | |
197 | + | return "(" + link + ".md)"; | |
198 | + | }); | |
199 | + | ||
200 | + | if(markdown.contains("__GHOST_URL__")) { | |
201 | + | System.out.println("Failed to replace all links in markdown in " + filename); | |
202 | + | System.out.println(markdown); | |
203 | + | } | |
204 | + | ||
205 | + | writer.println(markdown); | |
65 | 206 | } | |
66 | 207 | } | |
67 | 208 | return 0; | |
68 | 209 | } | |
69 | 210 | ||
70 | 211 | @JsonIgnoreProperties(ignoreUnknown = true) | |
71 | - | record Post(String slug, String title, String html){} | |
212 | + | record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){} | |
213 | + | ||
214 | + | @JsonIgnoreProperties(ignoreUnknown = true) | |
215 | + | record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){ | |
216 | + | ||
217 | + | public Optional<String> title() { | |
218 | + | return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description)); | |
219 | + | } | |
220 | + | ||
221 | + | public Optional<String> image() { | |
222 | + | return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image)); | |
223 | + | } | |
224 | + | } | |
225 | + | ||
226 | + | @JsonIgnoreProperties(ignoreUnknown = true) | |
227 | + | record Tag(String id, String slug, String name, String description, String feature_image){} | |
228 | + | ||
229 | + | @JsonIgnoreProperties(ignoreUnknown = true) | |
230 | + | record TagLink(String post_id, String tag_id){} | |
231 | + | ||
232 | + | @JsonIgnoreProperties(ignoreUnknown = true) | |
233 | + | record Author(String id, String slug, String name){} | |
234 | + | ||
235 | + | @JsonIgnoreProperties(ignoreUnknown = true) | |
236 | + | record AuthorLink(String post_id, String author_id){} | |
72 | 237 | ||
73 | 238 | public static void main(String[] args) { | |
74 | 239 | final int exitCode = new CommandLine(new ghost2md()).execute(args); | |
75 | 240 | System.exit(exitCode); | |
76 | 241 | } | |
77 | 242 | ||
243 | + | private String downloadAsset(String uriString) { | |
244 | + | var assetDir = new File(destinationDir, "assets"); | |
245 | + | if (!assetDir.exists()) { | |
246 | + | assetDir.mkdirs(); | |
247 | + | } | |
248 | + | ||
249 | + | var uri = URI.create(uriString); | |
250 | + | var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString()); | |
251 | + | if (file.exists()) { | |
252 | + | return "assets/" + file.getName(); | |
253 | + | } | |
254 | + | ||
255 | + | System.out.println("Downloading " + uriString); | |
256 | + | try (var out = new FileOutputStream(file)) { | |
257 | + | var conn = uri.toURL().openConnection(); | |
258 | + | try (var stream = conn.getInputStream()) { | |
259 | + | stream.transferTo(out); | |
260 | + | } | |
261 | + | } catch (IOException e) { | |
262 | + | System.err.println("Failed to download file " + uriString + ". " + e.getMessage()); | |
263 | + | return uriString; | |
264 | + | } | |
265 | + | ||
266 | + | return "assets/" + file.getName(); | |
267 | + | } | |
268 | + | ||
78 | 269 | } |
john revised this gist . Go to revision
1 file changed, 78 insertions
ghost2md.java(file created)
@@ -0,0 +1,78 @@ | |||
1 | + | ///usr/bin/env jbang "$0" "$@" ; exit $? | |
2 | + | // | |
3 | + | // This is a JBang script. You need JBang to run this script. | |
4 | + | // | |
5 | + | // To learn more go to https://www.jbang.dev and install JBang. | |
6 | + | // | |
7 | + | // Run this script: | |
8 | + | // ./ghost2md.java -f <input file> -d <output directory> | |
9 | + | // | |
10 | + | //DEPS info.picocli:picocli:4.7.7 | |
11 | + | //DEPS com.jayway.jsonpath:json-path:2.8.0 | |
12 | + | //DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0 | |
13 | + | //DEPS io.github.furstenheim:copy_down:1.1 | |
14 | + | //JAVA 21+ | |
15 | + | ||
16 | + | import com.fasterxml.jackson.annotation.JsonIgnoreProperties; | |
17 | + | import com.jayway.jsonpath.Configuration; | |
18 | + | import com.jayway.jsonpath.JsonPath; | |
19 | + | import com.jayway.jsonpath.ParseContext; | |
20 | + | import com.jayway.jsonpath.TypeRef; | |
21 | + | import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; | |
22 | + | ||
23 | + | import io.github.furstenheim.CopyDown; | |
24 | + | import picocli.CommandLine; | |
25 | + | import picocli.CommandLine.Command; | |
26 | + | import picocli.CommandLine.Option; | |
27 | + | import java.io.File; | |
28 | + | import java.io.FileWriter; | |
29 | + | import java.util.List; | |
30 | + | import java.util.concurrent.Callable; | |
31 | + | ||
32 | + | @Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files") | |
33 | + | public class ghost2md implements Callable<Integer> { | |
34 | + | ||
35 | + | private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build()); | |
36 | + | private static final CopyDown HTML_TO_MD = new CopyDown(); | |
37 | + | ||
38 | + | @Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file") | |
39 | + | private File exportFile; | |
40 | + | ||
41 | + | @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory") | |
42 | + | private File destinationDir; | |
43 | + | ||
44 | + | @Override | |
45 | + | public Integer call() throws Exception { | |
46 | + | if (!exportFile.exists()) { | |
47 | + | System.err.printf("%s does not exist%n", exportFile.getAbsolutePath()); | |
48 | + | return 404; | |
49 | + | } | |
50 | + | ||
51 | + | destinationDir.mkdirs(); | |
52 | + | ||
53 | + | var json = JSON.parse(exportFile); | |
54 | + | var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){}); | |
55 | + | for (var post : posts) { | |
56 | + | var filename = post.slug() + ".md"; | |
57 | + | var markdown = HTML_TO_MD.convert(post.html()); | |
58 | + | var file = new File(destinationDir, filename); | |
59 | + | var title = "# " + post.title(); | |
60 | + | System.out.printf("Writing %s\n", file); | |
61 | + | try (var writer = new FileWriter(file)) { | |
62 | + | writer.write(title); | |
63 | + | writer.write("\n\n"); | |
64 | + | writer.write(markdown); | |
65 | + | } | |
66 | + | } | |
67 | + | return 0; | |
68 | + | } | |
69 | + | ||
70 | + | @JsonIgnoreProperties(ignoreUnknown = true) | |
71 | + | record Post(String slug, String title, String html){} | |
72 | + | ||
73 | + | public static void main(String[] args) { | |
74 | + | final int exitCode = new CommandLine(new ghost2md()).execute(args); | |
75 | + | System.exit(exitCode); | |
76 | + | } | |
77 | + | ||
78 | + | } |