ghost2md.java
· 11 KiB · Java
Raw
///usr/bin/env jbang "$0" "$@" ; exit $?
//
// This is a JBang script. You need JBang to run this script.
//
// To learn more go to https://www.jbang.dev and install JBang.
//
// Run this script:
// ./ghost2md.java -f <input file> -d <output directory>
//
//DEPS info.picocli:picocli:4.7.7
//DEPS com.jayway.jsonpath:json-path:2.8.0
//DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0
//DEPS io.github.furstenheim:copy_down:1.1
//JAVA 21+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.ParseContext;
import com.jayway.jsonpath.TypeRef;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import io.github.furstenheim.CopyDown;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
import static java.util.Optional.ofNullable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.nio.file.Paths;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.regex.Pattern;
@Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files")
public class ghost2md implements Callable<Integer> {
private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build());
private static final CopyDown HTML_TO_MD = new CopyDown();
@Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file")
private File exportFile;
@Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory")
private File destinationDir;
@Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)")
private String type;
@Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)")
private String domain;
@Override
public Integer call() throws Exception {
if (!exportFile.exists()) {
System.err.printf("%s does not exist%n", exportFile.getAbsolutePath());
return 404;
}
destinationDir.mkdirs();
var json = JSON.parse(exportFile);
var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){});
var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef<List<PostMeta>>(){});
var tags = json.read("$.db[*].data.tags[*]", new TypeRef<List<Tag>>(){});
var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef<List<TagLink>>(){});
var authors = json.read("$.db[*].data.users[*]", new TypeRef<List<Author>>(){});
var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef<List<AuthorLink>>(){});
for (var post : posts) {
if (!"all".equals(type) && !post.type().equals(type)) {
continue;
}
var filename = post.slug() + ".md";
var markdown = HTML_TO_MD.convert(post.html());
var file = new File(destinationDir, filename);
var title = "# " + post.title();
var meta = postMeta.stream()
.filter(link -> link.post_id().equals(post.id()))
.findFirst();
var linkedTags = tagLinks.stream()
.filter(link -> link.post_id().equals(post.id()))
.flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id())))
.toList();
var linkedAuthors = authorLinks.stream()
.filter(link -> link.post_id().equals(post.id()))
.flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id())))
.toList();
System.out.printf("Writing %s\n", file);
try (var writer = new PrintWriter(new FileWriter(file), true)) {
writer.println("---");
writer.println("date:");
ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date));
ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date));
ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date));
if (post.custom_excerpt() != null) {
writer.println("excerpt: \"" + post.custom_excerpt() + "\"");
} else if(meta.isPresent() && meta.get().title().isPresent()) {
writer.println("excerpt: \"" + meta.get().title().get() + "\"");
} else {
writer.println("excerpt: \"" + post.title() + "\"");
}
if (post.featured() > 0) {
writer.println("pin: true");
}
if (post.feature_image() != null) {
var url = post.feature_image();
if (url.startsWith("__GHOST_URL__")) {
url = downloadAsset(url.replace("__GHOST_URL__", domain));
}
writer.println("cover: " + url);
} else if (meta.isPresent() && meta.get().image().isPresent()) {
var url = meta.get().image().get();
if (url.startsWith("__GHOST_URL__")) {
url = downloadAsset(url.replace("__GHOST_URL__", domain));
}
writer.println("cover: " + url);
} else if(!linkedTags.isEmpty()){
var url = linkedTags.stream()
.filter(tag -> tag.feature_image() != null)
.map(tag -> tag.feature_image())
.findFirst()
.get();
if (url.startsWith("__GHOST_URL__")) {
url = downloadAsset(url.replace("__GHOST_URL__", domain));
}
writer.println("cover: " + url);
}
if (post.status.equals("draft")) {
writer.println("draft: true");
}
if (!linkedTags.isEmpty() || post.featured() > 0) {
writer.println("tags:");
for (var tag : linkedTags) {
writer.println(" - slug: " + tag.slug());
writer.println(" title: \"" + tag.name() + "\"");
}
}
if (!linkedAuthors.isEmpty()) {
writer.println("authors:");
for (var author : linkedAuthors) {
writer.println(" - slug: " + author.slug());
writer.println(" name: \"" + author.name() + "\"");
}
}
writer.println("---");
writer.println();
writer.println(title);
writer.println();
var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)");
var internalLinks = internalLinkPattern.matcher(markdown);
markdown = internalLinks.replaceAll(result -> {
var link = result.group(1);
var anchor = result.group(2);
if (link.startsWith("/content") || link.startsWith("/api")) {
return "(" + downloadAsset(domain + link) + ")";
}
if (link.startsWith("/")) {
link = link.substring(1);
}
if (link.endsWith("/")) {
link = link.substring(0, link.length() -1);
}
if (anchor != null) {
return "(" + link + ".md" + "#" +anchor + ")";
}
System.out.println(link);
return "(" + link + ".md)";
});
if(markdown.contains("__GHOST_URL__")) {
System.out.println("Failed to replace all links in markdown in " + filename);
System.out.println(markdown);
}
writer.println(markdown);
}
}
return 0;
}
@JsonIgnoreProperties(ignoreUnknown = true)
record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){}
@JsonIgnoreProperties(ignoreUnknown = true)
record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){
public Optional<String> title() {
return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description));
}
public Optional<String> image() {
return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image));
}
}
@JsonIgnoreProperties(ignoreUnknown = true)
record Tag(String id, String slug, String name, String description, String feature_image){}
@JsonIgnoreProperties(ignoreUnknown = true)
record TagLink(String post_id, String tag_id){}
@JsonIgnoreProperties(ignoreUnknown = true)
record Author(String id, String slug, String name){}
@JsonIgnoreProperties(ignoreUnknown = true)
record AuthorLink(String post_id, String author_id){}
public static void main(String[] args) {
final int exitCode = new CommandLine(new ghost2md()).execute(args);
System.exit(exitCode);
}
private String downloadAsset(String uriString) {
var assetDir = new File(destinationDir, "assets");
if (!assetDir.exists()) {
assetDir.mkdirs();
}
var uri = URI.create(uriString);
var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString());
if (file.exists()) {
return "assets/" + file.getName();
}
System.out.println("Downloading " + uriString);
try (var out = new FileOutputStream(file)) {
var conn = uri.toURL().openConnection();
try (var stream = conn.getInputStream()) {
stream.transferTo(out);
}
} catch (IOException e) {
System.err.println("Failed to download file " + uriString + ". " + e.getMessage());
return uriString;
}
return "assets/" + file.getName();
}
}
1 | ///usr/bin/env jbang "$0" "$@" ; exit $? |
2 | // |
3 | // This is a JBang script. You need JBang to run this script. |
4 | // |
5 | // To learn more go to https://www.jbang.dev and install JBang. |
6 | // |
7 | // Run this script: |
8 | // ./ghost2md.java -f <input file> -d <output directory> |
9 | // |
10 | //DEPS info.picocli:picocli:4.7.7 |
11 | //DEPS com.jayway.jsonpath:json-path:2.8.0 |
12 | //DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0 |
13 | //DEPS io.github.furstenheim:copy_down:1.1 |
14 | //JAVA 21+ |
15 | |
16 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties; |
17 | import com.jayway.jsonpath.Configuration; |
18 | import com.jayway.jsonpath.JsonPath; |
19 | import com.jayway.jsonpath.ParseContext; |
20 | import com.jayway.jsonpath.TypeRef; |
21 | import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; |
22 | |
23 | import io.github.furstenheim.CopyDown; |
24 | import picocli.CommandLine; |
25 | import picocli.CommandLine.Command; |
26 | import picocli.CommandLine.Option; |
27 | |
28 | import static java.util.Optional.ofNullable; |
29 | |
30 | import java.io.File; |
31 | import java.io.FileOutputStream; |
32 | import java.io.FileWriter; |
33 | import java.io.IOException; |
34 | import java.io.PrintWriter; |
35 | import java.net.URI; |
36 | import java.nio.file.Paths; |
37 | import java.util.List; |
38 | import java.util.Optional; |
39 | import java.util.concurrent.Callable; |
40 | import java.util.regex.Pattern; |
41 | |
42 | @Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files") |
43 | public class ghost2md implements Callable<Integer> { |
44 | |
45 | private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build()); |
46 | private static final CopyDown HTML_TO_MD = new CopyDown(); |
47 | |
48 | @Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file") |
49 | private File exportFile; |
50 | |
51 | @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory") |
52 | private File destinationDir; |
53 | |
54 | @Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)") |
55 | private String type; |
56 | |
57 | @Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)") |
58 | private String domain; |
59 | |
60 | |
61 | @Override |
62 | public Integer call() throws Exception { |
63 | if (!exportFile.exists()) { |
64 | System.err.printf("%s does not exist%n", exportFile.getAbsolutePath()); |
65 | return 404; |
66 | } |
67 | |
68 | destinationDir.mkdirs(); |
69 | |
70 | var json = JSON.parse(exportFile); |
71 | var posts = json.read("$.db[*].data.posts[*]", new TypeRef<List<Post>>(){}); |
72 | var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef<List<PostMeta>>(){}); |
73 | var tags = json.read("$.db[*].data.tags[*]", new TypeRef<List<Tag>>(){}); |
74 | var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef<List<TagLink>>(){}); |
75 | var authors = json.read("$.db[*].data.users[*]", new TypeRef<List<Author>>(){}); |
76 | var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef<List<AuthorLink>>(){}); |
77 | |
78 | for (var post : posts) { |
79 | if (!"all".equals(type) && !post.type().equals(type)) { |
80 | continue; |
81 | } |
82 | |
83 | var filename = post.slug() + ".md"; |
84 | var markdown = HTML_TO_MD.convert(post.html()); |
85 | var file = new File(destinationDir, filename); |
86 | var title = "# " + post.title(); |
87 | |
88 | var meta = postMeta.stream() |
89 | .filter(link -> link.post_id().equals(post.id())) |
90 | .findFirst(); |
91 | |
92 | var linkedTags = tagLinks.stream() |
93 | .filter(link -> link.post_id().equals(post.id())) |
94 | .flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id()))) |
95 | .toList(); |
96 | |
97 | var linkedAuthors = authorLinks.stream() |
98 | .filter(link -> link.post_id().equals(post.id())) |
99 | .flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id()))) |
100 | .toList(); |
101 | |
102 | System.out.printf("Writing %s\n", file); |
103 | try (var writer = new PrintWriter(new FileWriter(file), true)) { |
104 | writer.println("---"); |
105 | |
106 | writer.println("date:"); |
107 | ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date)); |
108 | ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date)); |
109 | ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date)); |
110 | |
111 | if (post.custom_excerpt() != null) { |
112 | writer.println("excerpt: \"" + post.custom_excerpt() + "\""); |
113 | } else if(meta.isPresent() && meta.get().title().isPresent()) { |
114 | writer.println("excerpt: \"" + meta.get().title().get() + "\""); |
115 | } else { |
116 | writer.println("excerpt: \"" + post.title() + "\""); |
117 | } |
118 | |
119 | if (post.featured() > 0) { |
120 | writer.println("pin: true"); |
121 | } |
122 | |
123 | if (post.feature_image() != null) { |
124 | var url = post.feature_image(); |
125 | if (url.startsWith("__GHOST_URL__")) { |
126 | url = downloadAsset(url.replace("__GHOST_URL__", domain)); |
127 | } |
128 | writer.println("cover: " + url); |
129 | } else if (meta.isPresent() && meta.get().image().isPresent()) { |
130 | var url = meta.get().image().get(); |
131 | if (url.startsWith("__GHOST_URL__")) { |
132 | url = downloadAsset(url.replace("__GHOST_URL__", domain)); |
133 | } |
134 | writer.println("cover: " + url); |
135 | } else if(!linkedTags.isEmpty()){ |
136 | var url = linkedTags.stream() |
137 | .filter(tag -> tag.feature_image() != null) |
138 | .map(tag -> tag.feature_image()) |
139 | .findFirst() |
140 | .get(); |
141 | if (url.startsWith("__GHOST_URL__")) { |
142 | url = downloadAsset(url.replace("__GHOST_URL__", domain)); |
143 | } |
144 | writer.println("cover: " + url); |
145 | |
146 | } |
147 | |
148 | if (post.status.equals("draft")) { |
149 | writer.println("draft: true"); |
150 | } |
151 | |
152 | if (!linkedTags.isEmpty() || post.featured() > 0) { |
153 | |
154 | writer.println("tags:"); |
155 | |
156 | for (var tag : linkedTags) { |
157 | writer.println(" - slug: " + tag.slug()); |
158 | writer.println(" title: \"" + tag.name() + "\""); |
159 | } |
160 | } |
161 | |
162 | if (!linkedAuthors.isEmpty()) { |
163 | |
164 | writer.println("authors:"); |
165 | |
166 | for (var author : linkedAuthors) { |
167 | writer.println(" - slug: " + author.slug()); |
168 | writer.println(" name: \"" + author.name() + "\""); |
169 | } |
170 | } |
171 | |
172 | writer.println("---"); |
173 | writer.println(); |
174 | |
175 | writer.println(title); |
176 | writer.println(); |
177 | |
178 | var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)"); |
179 | var internalLinks = internalLinkPattern.matcher(markdown); |
180 | markdown = internalLinks.replaceAll(result -> { |
181 | var link = result.group(1); |
182 | var anchor = result.group(2); |
183 | if (link.startsWith("/content") || link.startsWith("/api")) { |
184 | return "(" + downloadAsset(domain + link) + ")"; |
185 | } |
186 | if (link.startsWith("/")) { |
187 | link = link.substring(1); |
188 | } |
189 | if (link.endsWith("/")) { |
190 | link = link.substring(0, link.length() -1); |
191 | } |
192 | if (anchor != null) { |
193 | return "(" + link + ".md" + "#" +anchor + ")"; |
194 | } |
195 | |
196 | System.out.println(link); |
197 | return "(" + link + ".md)"; |
198 | }); |
199 | |
200 | if(markdown.contains("__GHOST_URL__")) { |
201 | System.out.println("Failed to replace all links in markdown in " + filename); |
202 | System.out.println(markdown); |
203 | } |
204 | |
205 | writer.println(markdown); |
206 | } |
207 | } |
208 | return 0; |
209 | } |
210 | |
211 | @JsonIgnoreProperties(ignoreUnknown = true) |
212 | record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){} |
213 | |
214 | @JsonIgnoreProperties(ignoreUnknown = true) |
215 | record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){ |
216 | |
217 | public Optional<String> title() { |
218 | return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description)); |
219 | } |
220 | |
221 | public Optional<String> image() { |
222 | return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image)); |
223 | } |
224 | } |
225 | |
226 | @JsonIgnoreProperties(ignoreUnknown = true) |
227 | record Tag(String id, String slug, String name, String description, String feature_image){} |
228 | |
229 | @JsonIgnoreProperties(ignoreUnknown = true) |
230 | record TagLink(String post_id, String tag_id){} |
231 | |
232 | @JsonIgnoreProperties(ignoreUnknown = true) |
233 | record Author(String id, String slug, String name){} |
234 | |
235 | @JsonIgnoreProperties(ignoreUnknown = true) |
236 | record AuthorLink(String post_id, String author_id){} |
237 | |
238 | public static void main(String[] args) { |
239 | final int exitCode = new CommandLine(new ghost2md()).execute(args); |
240 | System.exit(exitCode); |
241 | } |
242 | |
243 | private String downloadAsset(String uriString) { |
244 | var assetDir = new File(destinationDir, "assets"); |
245 | if (!assetDir.exists()) { |
246 | assetDir.mkdirs(); |
247 | } |
248 | |
249 | var uri = URI.create(uriString); |
250 | var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString()); |
251 | if (file.exists()) { |
252 | return "assets/" + file.getName(); |
253 | } |
254 | |
255 | System.out.println("Downloading " + uriString); |
256 | try (var out = new FileOutputStream(file)) { |
257 | var conn = uri.toURL().openConnection(); |
258 | try (var stream = conn.getInputStream()) { |
259 | stream.transferTo(out); |
260 | } |
261 | } catch (IOException e) { |
262 | System.err.println("Failed to download file " + uriString + ". " + e.getMessage()); |
263 | return uriString; |
264 | } |
265 | |
266 | return "assets/" + file.getName(); |
267 | } |
268 | |
269 | } |