Last active 1751784195

Converts Ghost export to Markdown

john's Avatar john revised this gist 1751784194. Go to revision

1 file changed, 196 insertions, 5 deletions

ghost2md.java

@@ -24,10 +24,20 @@ import io.github.furstenheim.CopyDown;
24 24 import picocli.CommandLine;
25 25 import picocli.CommandLine.Command;
26 26 import picocli.CommandLine.Option;
27 +
28 + import static java.util.Optional.ofNullable;
29 +
27 30 import java.io.File;
31 + import java.io.FileOutputStream;
28 32 import java.io.FileWriter;
33 + import java.io.IOException;
34 + import java.io.PrintWriter;
35 + import java.net.URI;
36 + import java.nio.file.Paths;
29 37 import java.util.List;
38 + import java.util.Optional;
30 39 import java.util.concurrent.Callable;
40 + import java.util.regex.Pattern;
31 41
32 42 @Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files")
33 43 public class ghost2md implements Callable<Integer> {
@@ -41,6 +51,13 @@ public class ghost2md implements Callable<Integer> {
41 51 @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory")
42 52 private File destinationDir;
43 53
54 + @Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)")
55 + private String type;
56 +
57 + @Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)")
58 + private String domain;
59 +
60 +
44 61 @Override
45 62 public Integer call() throws Exception {
46 63 if (!exportFile.exists()) {
@@ -52,27 +69,201 @@ public class ghost2md implements Callable<Integer> {
52 69
53 70 var json = JSON.parse(exportFile);
54 71 var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){});
72 + var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef<List<PostMeta>>(){});
73 + var tags = json.read("$.db[*].data.tags[*]", new TypeRef<List<Tag>>(){});
74 + var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef<List<TagLink>>(){});
75 + var authors = json.read("$.db[*].data.users[*]", new TypeRef<List<Author>>(){});
76 + var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef<List<AuthorLink>>(){});
77 +
55 78 for (var post : posts) {
79 + if (!"all".equals(type) && !post.type().equals(type)) {
80 + continue;
81 + }
82 +
56 83 var filename = post.slug() + ".md";
57 84 var markdown = HTML_TO_MD.convert(post.html());
58 85 var file = new File(destinationDir, filename);
59 86 var title = "# " + post.title();
87 +
88 + var meta = postMeta.stream()
89 + .filter(link -> link.post_id().equals(post.id()))
90 + .findFirst();
91 +
92 + var linkedTags = tagLinks.stream()
93 + .filter(link -> link.post_id().equals(post.id()))
94 + .flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id())))
95 + .toList();
96 +
97 + var linkedAuthors = authorLinks.stream()
98 + .filter(link -> link.post_id().equals(post.id()))
99 + .flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id())))
100 + .toList();
101 +
60 102 System.out.printf("Writing %s\n", file);
61 - try (var writer = new FileWriter(file)) {
62 - writer.write(title);
63 - writer.write("\n\n");
64 - writer.write(markdown);
103 + try (var writer = new PrintWriter(new FileWriter(file), true)) {
104 + writer.println("---");
105 +
106 + writer.println("date:");
107 + ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date));
108 + ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date));
109 + ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date));
110 +
111 + if (post.custom_excerpt() != null) {
112 + writer.println("excerpt: \"" + post.custom_excerpt() + "\"");
113 + } else if(meta.isPresent() && meta.get().title().isPresent()) {
114 + writer.println("excerpt: \"" + meta.get().title().get() + "\"");
115 + } else {
116 + writer.println("excerpt: \"" + post.title() + "\"");
117 + }
118 +
119 + if (post.featured() > 0) {
120 + writer.println("pin: true");
121 + }
122 +
123 + if (post.feature_image() != null) {
124 + var url = post.feature_image();
125 + if (url.startsWith("__GHOST_URL__")) {
126 + url = downloadAsset(url.replace("__GHOST_URL__", domain));
127 + }
128 + writer.println("cover: " + url);
129 + } else if (meta.isPresent() && meta.get().image().isPresent()) {
130 + var url = meta.get().image().get();
131 + if (url.startsWith("__GHOST_URL__")) {
132 + url = downloadAsset(url.replace("__GHOST_URL__", domain));
133 + }
134 + writer.println("cover: " + url);
135 + } else if(!linkedTags.isEmpty()){
136 + var url = linkedTags.stream()
137 + .filter(tag -> tag.feature_image() != null)
138 + .map(tag -> tag.feature_image())
139 + .findFirst()
140 + .get();
141 + if (url.startsWith("__GHOST_URL__")) {
142 + url = downloadAsset(url.replace("__GHOST_URL__", domain));
143 + }
144 + writer.println("cover: " + url);
145 +
146 + }
147 +
148 + if (post.status.equals("draft")) {
149 + writer.println("draft: true");
150 + }
151 +
152 + if (!linkedTags.isEmpty() || post.featured() > 0) {
153 +
154 + writer.println("tags:");
155 +
156 + for (var tag : linkedTags) {
157 + writer.println(" - slug: " + tag.slug());
158 + writer.println(" title: \"" + tag.name() + "\"");
159 + }
160 + }
161 +
162 + if (!linkedAuthors.isEmpty()) {
163 +
164 + writer.println("authors:");
165 +
166 + for (var author : linkedAuthors) {
167 + writer.println(" - slug: " + author.slug());
168 + writer.println(" name: \"" + author.name() + "\"");
169 + }
170 + }
171 +
172 + writer.println("---");
173 + writer.println();
174 +
175 + writer.println(title);
176 + writer.println();
177 +
178 + var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)");
179 + var internalLinks = internalLinkPattern.matcher(markdown);
180 + markdown = internalLinks.replaceAll(result -> {
181 + var link = result.group(1);
182 + var anchor = result.group(2);
183 + if (link.startsWith("/content") || link.startsWith("/api")) {
184 + return "(" + downloadAsset(domain + link) + ")";
185 + }
186 + if (link.startsWith("/")) {
187 + link = link.substring(1);
188 + }
189 + if (link.endsWith("/")) {
190 + link = link.substring(0, link.length() -1);
191 + }
192 + if (anchor != null) {
193 + return "(" + link + ".md" + "#" +anchor + ")";
194 + }
195 +
196 + System.out.println(link);
197 + return "(" + link + ".md)";
198 + });
199 +
200 + if(markdown.contains("__GHOST_URL__")) {
201 + System.out.println("Failed to replace all links in markdown in " + filename);
202 + System.out.println(markdown);
203 + }
204 +
205 + writer.println(markdown);
65 206 }
66 207 }
67 208 return 0;
68 209 }
69 210
70 211 @JsonIgnoreProperties(ignoreUnknown = true)
71 - record Post(String slug, String title, String html){}
212 + record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){}
213 +
214 + @JsonIgnoreProperties(ignoreUnknown = true)
215 + record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){
216 +
217 + public Optional<String> title() {
218 + return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description));
219 + }
220 +
221 + public Optional<String> image() {
222 + return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image));
223 + }
224 + }
225 +
226 + @JsonIgnoreProperties(ignoreUnknown = true)
227 + record Tag(String id, String slug, String name, String description, String feature_image){}
228 +
229 + @JsonIgnoreProperties(ignoreUnknown = true)
230 + record TagLink(String post_id, String tag_id){}
231 +
232 + @JsonIgnoreProperties(ignoreUnknown = true)
233 + record Author(String id, String slug, String name){}
234 +
235 + @JsonIgnoreProperties(ignoreUnknown = true)
236 + record AuthorLink(String post_id, String author_id){}
72 237
73 238 public static void main(String[] args) {
74 239 final int exitCode = new CommandLine(new ghost2md()).execute(args);
75 240 System.exit(exitCode);
76 241 }
77 242
243 + private String downloadAsset(String uriString) {
244 + var assetDir = new File(destinationDir, "assets");
245 + if (!assetDir.exists()) {
246 + assetDir.mkdirs();
247 + }
248 +
249 + var uri = URI.create(uriString);
250 + var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString());
251 + if (file.exists()) {
252 + return "assets/" + file.getName();
253 + }
254 +
255 + System.out.println("Downloading " + uriString);
256 + try (var out = new FileOutputStream(file)) {
257 + var conn = uri.toURL().openConnection();
258 + try (var stream = conn.getInputStream()) {
259 + stream.transferTo(out);
260 + }
261 + } catch (IOException e) {
262 + System.err.println("Failed to download file " + uriString + ". " + e.getMessage());
263 + return uriString;
264 + }
265 +
266 + return "assets/" + file.getName();
267 + }
268 +
78 269 }

john's Avatar john revised this gist 1751277538. Go to revision

1 file changed, 78 insertions

ghost2md.java(file created)

@@ -0,0 +1,78 @@
1 + ///usr/bin/env jbang "$0" "$@" ; exit $?
2 + //
3 + // This is a JBang script. You need JBang to run this script.
4 + //
5 + // To learn more go to https://www.jbang.dev and install JBang.
6 + //
7 + // Run this script:
8 + // ./ghost2md.java -f <input file> -d <output directory>
9 + //
10 + //DEPS info.picocli:picocli:4.7.7
11 + //DEPS com.jayway.jsonpath:json-path:2.8.0
12 + //DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0
13 + //DEPS io.github.furstenheim:copy_down:1.1
14 + //JAVA 21+
15 +
16 + import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
17 + import com.jayway.jsonpath.Configuration;
18 + import com.jayway.jsonpath.JsonPath;
19 + import com.jayway.jsonpath.ParseContext;
20 + import com.jayway.jsonpath.TypeRef;
21 + import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
22 +
23 + import io.github.furstenheim.CopyDown;
24 + import picocli.CommandLine;
25 + import picocli.CommandLine.Command;
26 + import picocli.CommandLine.Option;
27 + import java.io.File;
28 + import java.io.FileWriter;
29 + import java.util.List;
30 + import java.util.concurrent.Callable;
31 +
32 + @Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files")
33 + public class ghost2md implements Callable<Integer> {
34 +
35 + private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build());
36 + private static final CopyDown HTML_TO_MD = new CopyDown();
37 +
38 + @Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file")
39 + private File exportFile;
40 +
41 + @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory")
42 + private File destinationDir;
43 +
44 + @Override
45 + public Integer call() throws Exception {
46 + if (!exportFile.exists()) {
47 + System.err.printf("%s does not exist%n", exportFile.getAbsolutePath());
48 + return 404;
49 + }
50 +
51 + destinationDir.mkdirs();
52 +
53 + var json = JSON.parse(exportFile);
54 + var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){});
55 + for (var post : posts) {
56 + var filename = post.slug() + ".md";
57 + var markdown = HTML_TO_MD.convert(post.html());
58 + var file = new File(destinationDir, filename);
59 + var title = "# " + post.title();
60 + System.out.printf("Writing %s\n", file);
61 + try (var writer = new FileWriter(file)) {
62 + writer.write(title);
63 + writer.write("\n\n");
64 + writer.write(markdown);
65 + }
66 + }
67 + return 0;
68 + }
69 +
70 + @JsonIgnoreProperties(ignoreUnknown = true)
71 + record Post(String slug, String title, String html){}
72 +
73 + public static void main(String[] args) {
74 + final int exitCode = new CommandLine(new ghost2md()).execute(args);
75 + System.exit(exitCode);
76 + }
77 +
78 + }
Newer Older