Last active 1751784195

Converts Ghost export to Markdown

ghost2md.java Raw
1///usr/bin/env jbang "$0" "$@" ; exit $?
2//
3// This is a JBang script. You need JBang to run this script.
4//
5// To learn more go to https://www.jbang.dev and install JBang.
6//
7// Run this script:
8// ./ghost2md.java -f <input file> -d <output directory>
9//
10//DEPS info.picocli:picocli:4.7.7
11//DEPS com.jayway.jsonpath:json-path:2.8.0
12//DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0
13//DEPS io.github.furstenheim:copy_down:1.1
14//JAVA 21+
15
16import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
17import com.jayway.jsonpath.Configuration;
18import com.jayway.jsonpath.JsonPath;
19import com.jayway.jsonpath.ParseContext;
20import com.jayway.jsonpath.TypeRef;
21import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
22
23import io.github.furstenheim.CopyDown;
24import picocli.CommandLine;
25import picocli.CommandLine.Command;
26import picocli.CommandLine.Option;
27
28import static java.util.Optional.ofNullable;
29
30import java.io.File;
31import java.io.FileOutputStream;
32import java.io.FileWriter;
33import java.io.IOException;
34import java.io.PrintWriter;
35import java.net.URI;
36import java.nio.file.Paths;
37import java.util.List;
38import java.util.Optional;
39import java.util.concurrent.Callable;
40import java.util.regex.Pattern;
41
42@Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files")
43public class ghost2md implements Callable<Integer> {
44
45 private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build());
46 private static final CopyDown HTML_TO_MD = new CopyDown();
47
48 @Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file")
49 private File exportFile;
50
51 @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory")
52 private File destinationDir;
53
54 @Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)")
55 private String type;
56
57 @Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)")
58 private String domain;
59
60
61 @Override
62 public Integer call() throws Exception {
63 if (!exportFile.exists()) {
64 System.err.printf("%s does not exist%n", exportFile.getAbsolutePath());
65 return 404;
66 }
67
68 destinationDir.mkdirs();
69
70 var json = JSON.parse(exportFile);
71 var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){});
72 var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef<List<PostMeta>>(){});
73 var tags = json.read("$.db[*].data.tags[*]", new TypeRef<List<Tag>>(){});
74 var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef<List<TagLink>>(){});
75 var authors = json.read("$.db[*].data.users[*]", new TypeRef<List<Author>>(){});
76 var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef<List<AuthorLink>>(){});
77
78 for (var post : posts) {
79 if (!"all".equals(type) && !post.type().equals(type)) {
80 continue;
81 }
82
83 var filename = post.slug() + ".md";
84 var markdown = HTML_TO_MD.convert(post.html());
85 var file = new File(destinationDir, filename);
86 var title = "# " + post.title();
87
88 var meta = postMeta.stream()
89 .filter(link -> link.post_id().equals(post.id()))
90 .findFirst();
91
92 var linkedTags = tagLinks.stream()
93 .filter(link -> link.post_id().equals(post.id()))
94 .flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id())))
95 .toList();
96
97 var linkedAuthors = authorLinks.stream()
98 .filter(link -> link.post_id().equals(post.id()))
99 .flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id())))
100 .toList();
101
102 System.out.printf("Writing %s\n", file);
103 try (var writer = new PrintWriter(new FileWriter(file), true)) {
104 writer.println("---");
105
106 writer.println("date:");
107 ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date));
108 ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date));
109 ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date));
110
111 if (post.custom_excerpt() != null) {
112 writer.println("excerpt: \"" + post.custom_excerpt() + "\"");
113 } else if(meta.isPresent() && meta.get().title().isPresent()) {
114 writer.println("excerpt: \"" + meta.get().title().get() + "\"");
115 } else {
116 writer.println("excerpt: \"" + post.title() + "\"");
117 }
118
119 if (post.featured() > 0) {
120 writer.println("pin: true");
121 }
122
123 if (post.feature_image() != null) {
124 var url = post.feature_image();
125 if (url.startsWith("__GHOST_URL__")) {
126 url = downloadAsset(url.replace("__GHOST_URL__", domain));
127 }
128 writer.println("cover: " + url);
129 } else if (meta.isPresent() && meta.get().image().isPresent()) {
130 var url = meta.get().image().get();
131 if (url.startsWith("__GHOST_URL__")) {
132 url = downloadAsset(url.replace("__GHOST_URL__", domain));
133 }
134 writer.println("cover: " + url);
135 } else if(!linkedTags.isEmpty()){
136 var url = linkedTags.stream()
137 .filter(tag -> tag.feature_image() != null)
138 .map(tag -> tag.feature_image())
139 .findFirst()
140 .get();
141 if (url.startsWith("__GHOST_URL__")) {
142 url = downloadAsset(url.replace("__GHOST_URL__", domain));
143 }
144 writer.println("cover: " + url);
145
146 }
147
148 if (post.status.equals("draft")) {
149 writer.println("draft: true");
150 }
151
152 if (!linkedTags.isEmpty() || post.featured() > 0) {
153
154 writer.println("tags:");
155
156 for (var tag : linkedTags) {
157 writer.println(" - slug: " + tag.slug());
158 writer.println(" title: \"" + tag.name() + "\"");
159 }
160 }
161
162 if (!linkedAuthors.isEmpty()) {
163
164 writer.println("authors:");
165
166 for (var author : linkedAuthors) {
167 writer.println(" - slug: " + author.slug());
168 writer.println(" name: \"" + author.name() + "\"");
169 }
170 }
171
172 writer.println("---");
173 writer.println();
174
175 writer.println(title);
176 writer.println();
177
178 var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)");
179 var internalLinks = internalLinkPattern.matcher(markdown);
180 markdown = internalLinks.replaceAll(result -> {
181 var link = result.group(1);
182 var anchor = result.group(2);
183 if (link.startsWith("/content") || link.startsWith("/api")) {
184 return "(" + downloadAsset(domain + link) + ")";
185 }
186 if (link.startsWith("/")) {
187 link = link.substring(1);
188 }
189 if (link.endsWith("/")) {
190 link = link.substring(0, link.length() -1);
191 }
192 if (anchor != null) {
193 return "(" + link + ".md" + "#" +anchor + ")";
194 }
195
196 System.out.println(link);
197 return "(" + link + ".md)";
198 });
199
200 if(markdown.contains("__GHOST_URL__")) {
201 System.out.println("Failed to replace all links in markdown in " + filename);
202 System.out.println(markdown);
203 }
204
205 writer.println(markdown);
206 }
207 }
208 return 0;
209 }
210
211 @JsonIgnoreProperties(ignoreUnknown = true)
212 record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){}
213
214 @JsonIgnoreProperties(ignoreUnknown = true)
215 record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){
216
217 public Optional<String> title() {
218 return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description));
219 }
220
221 public Optional<String> image() {
222 return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image));
223 }
224 }
225
226 @JsonIgnoreProperties(ignoreUnknown = true)
227 record Tag(String id, String slug, String name, String description, String feature_image){}
228
229 @JsonIgnoreProperties(ignoreUnknown = true)
230 record TagLink(String post_id, String tag_id){}
231
232 @JsonIgnoreProperties(ignoreUnknown = true)
233 record Author(String id, String slug, String name){}
234
235 @JsonIgnoreProperties(ignoreUnknown = true)
236 record AuthorLink(String post_id, String author_id){}
237
238 public static void main(String[] args) {
239 final int exitCode = new CommandLine(new ghost2md()).execute(args);
240 System.exit(exitCode);
241 }
242
243 private String downloadAsset(String uriString) {
244 var assetDir = new File(destinationDir, "assets");
245 if (!assetDir.exists()) {
246 assetDir.mkdirs();
247 }
248
249 var uri = URI.create(uriString);
250 var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString());
251 if (file.exists()) {
252 return "assets/" + file.getName();
253 }
254
255 System.out.println("Downloading " + uriString);
256 try (var out = new FileOutputStream(file)) {
257 var conn = uri.toURL().openConnection();
258 try (var stream = conn.getInputStream()) {
259 stream.transferTo(out);
260 }
261 } catch (IOException e) {
262 System.err.println("Failed to download file " + uriString + ". " + e.getMessage());
263 return uriString;
264 }
265
266 return "assets/" + file.getName();
267 }
268
269}