1 /++
2     Lightshot `prnt.sc` (and `prntscr.com`) gallery downloader.
3 
4     See_Also: https://app.prntscr.com/en/index.html
5  +/
6 module prntscget.app;
7 
8 private:
9 
10 import std.array : Appender;
11 import std.getopt : GetoptResult;
12 import std.json : JSONValue;
13 import core.time : Duration;
14 
15 public:
16 
17 
18 // RemoteImage
19 /++
20     Embodies the notion of an image to be downloaded.
21  +/
22 struct RemoteImage
23 {
24     /// HTTP URL of the image.
25     string url;
26 
27     /// Local path to save the remote image to.
28     string localPath;
29 
30     /// Image index (number in list JSON).
31     size_t number;
32 
33     /// Constructor.
34     this(const string url, const string localPath, const size_t number) pure @safe @nogc nothrow
35     {
36         this.url = url;
37         this.localPath = localPath;
38         this.number = number;
39     }
40 }
41 
42 
43 // Configuration
44 /++
45     Aggregate of values supplied at the command line.
46  +/
47 struct Configuration
48 {
49     /// File to save the JSON list of images to.
50     string listFile = "target.json";
51 
52     /++
53      +  How many times to try downloading a file before admitting failure and
54      +  proceeding with the next one.
55      +/
56     uint retriesPerFile = 20;
57 
58     /// Directory to save images to.
59     string targetDirectory = "images";
60 
61     /// Request timeout when downloading an image.
62     uint requestTimeoutSeconds = 60;
63 
64     /// How many seconds to wait between image downloads.
65     double delayBetweenImagesSeconds = 1.0;
66 
67     /// The offset number of images in the list to skip considering.
68     uint imageOffset;
69 
70     /// The number of images to effectively skip when downloading (e.g. the index starting position).
71     uint imagesToSkip;
72 
73     /// How many images to download, ignoring duplicates.
74     uint numToDownload = uint.max;
75 
76     /// `__auth` cookie string specified at the command line.
77     string cookie;
78 
79     /// Whether or not to always keep downloaded files, even if they're not valid images.
80     bool alwaysKeep;
81 
82     /// Whether or not this is a dry run.
83     bool dryRun;
84 }
85 
86 
87 // ShellReturn
88 /++
89     Shell return values.
90  +/
91 enum ShellReturn : int
92 {
93     success           = 0,  /// Success.
94     exception         = 1,  /// An unhandled exception was thrown.
95     failedToFetchList = 2,  /// The JSON list of images could not be fetched from server.
96     imageJSONNotFound = 3,  /// The JSON list file could not be found.
97     targetDirNotADir  = 4,  /// Target directory is a file or other non-directory.
98 }
99 
100 
101 // MagicNumber
102 /++
103     Magic numbers.
104  +/
105 enum MagicNumber : int
106 {
107     fileIsNotAnImage  = -1,  /// Saved file was not a JPEG nor a PNG.
108     dryRunSkip        = -2,  /// A download was skipped because of a dry run.
109 }
110 
111 
112 // main
113 /++
114     Program entry point.
115 
116     Merely passes execution to [run], wrapped in a try-catch.
117 
118     Params:
119         args = Arguments passed at the command line.
120 
121     Returns:
122         zero on success, non-zero on errors.
123  +/
124 int main(string[] args)
125 {
126     try
127     {
128         return run(args);
129     }
130     catch (Exception e)
131     {
132         import std.stdio : writeln;
133         writeln("exception thrown: ", e.msg);
134         return ShellReturn.exception;
135     }
136 
137     assert(0);
138 }
139 
140 
141 // run
142 /++
143     Program main logic.
144 
145     Params:
146         args = Arguments passed at the command line.
147 
148     Returns:
149         zero on success, non-zero on errors.
150  +/
151 int run(string[] args)
152 {
153     import std.algorithm.comparison : min;
154     import std.datetime.systime : Clock;
155     import std.file : exists;
156     import std.json : parseJSON;
157     import std.range : drop, take;
158     import std.stdio : writefln, writeln;
159     import core.time : msecs;
160 
161     Configuration config;
162 
163     auto results = handleGetopt(args, config);
164 
165     if (results.helpWanted)
166     {
167         printHelp(results, args);
168         return ShellReturn.success;
169     }
170 
171     /// JSON image list, fetched from the server
172     JSONValue listJSON;
173 
174     /// HTTP GET request headers to use when downloading
175     immutable headers = buildHeaders();
176 
177     if (config.cookie.length)
178     {
179         import std.algorithm.searching : canFind;
180         import std.stdio : File;
181 
182         writefln(`fetching image list and saving into "%s"...`, config.listFile);
183         const listFileContents = getImageList(headers, config.requestTimeoutSeconds);
184 
185         if (!listFileContents.canFind(`"result":{"success":true,`))
186         {
187             writeln("failed to fetch image list. incorrect cookie?");
188             return ShellReturn.failedToFetchList;
189         }
190 
191         listJSON = parseJSON(listFileContents);
192         immutable total = listJSON["result"]["total"].integer;
193         writefln("%d %s found.", total, total.plurality("image", "images"));
194         if (!config.dryRun) File(config.listFile, "w").writeln(listJSON.toPrettyString);
195     }
196     else if (!config.listFile.exists)
197     {
198         writefln(`image list file "%s" does not exist.`, config.listFile);
199 
200         if (config.listFile == Configuration.init.listFile)
201         {
202             // No list file was probably specified
203             writeln("see the README on how to provide a cookie with `-c` to download your gallery list of images.");
204         }
205 
206         return ShellReturn.imageJSONNotFound;
207     }
208 
209     if (!ensureImageDirectory(config.targetDirectory))
210     {
211         writefln(`"%s" is not a directory.`, config.targetDirectory);
212         return ShellReturn.targetDirNotADir;
213     }
214 
215     static if (__VERSION__ >= 2087)
216     {
217         import std.json : JSONType;
218         alias jsonNullType = JSONType.null_;
219     }
220     else
221     {
222         import std.json : JSON_TYPE;
223         alias jsonNullType = JSON_TYPE.NULL;
224     }
225 
226     if (listJSON.type == jsonNullType)
227     {
228         import std.file : readText;
229 
230         // A cookie was not supplied and the list JSON was never read
231         listJSON = config.listFile
232             .readText
233             .parseJSON;
234     }
235 
236     immutable numImages = cast(size_t)listJSON["result"]["total"].integer;
237 
238     if (!numImages)
239     {
240         writeln("no images to fetch.");
241         return ShellReturn.success;
242     }
243 
244     Appender!(RemoteImage[]) images;
245     immutable numImagesToDownload = min(numImages, config.numToDownload);
246     images.reserve(numImagesToDownload);
247     immutable numExistingImages = enumerateImages(images, listJSON, config);
248 
249     if (!images.data.length)
250     {
251         writefln("\nno images to fetch -- all %d are already downloaded.", numImages);
252         return ShellReturn.success;
253     }
254     else if (numExistingImages > 0)
255     {
256         writefln(" (skipping %d %s already in directory)", numExistingImages,
257             numExistingImages.plurality("image", "images"));
258     }
259 
260     const imageSelection = images.data
261         .drop(config.imagesToSkip)
262         .take(numImagesToDownload);
263 
264     immutable delayBetweenImages = (cast(int)(1000 * config.delayBetweenImagesSeconds)).msecs;
265     immutable eta = (images.data.length + (-1)) * delayBetweenImages;
266 
267     writeln("image list file: ", config.listFile);
268     writefln("delay between images: %.1f seconds", config.delayBetweenImagesSeconds);
269     writeln("saving to directory: ", config.targetDirectory);
270     writefln("total images to download: %s -- this will take a MINIMUM of %s.",
271         images.data.length, eta);
272 
273     auto before = Clock.currTime;
274     downloadAllImages(imageSelection, config, headers);
275     auto after = Clock.currTime;
276     before.fracSecs = 0.msecs;
277     after.fracSecs = 0.msecs;
278 
279     writeln("done. actual total time elapsed: ", (after-before));
280     return ShellReturn.success;
281 }
282 
283 
284 // handleGetopt
285 /++
286     Handles getopt arguments passed to the program.
287 
288     Params:
289         args = Command-line arguments passed to the program.
290         config = out [Configuration] to set the members of.
291 
292     Returns:
293         [std.getopt.GetoptResult] as returned by the call to [std.getopt.getopt].
294  +/
295 auto handleGetopt(ref string[] args, out Configuration config) /*@safe*/
296 {
297     import std.getopt : getopt, getoptConfig = config;
298 
299     return getopt(args,
300         getoptConfig.caseSensitive,
301         "c|cookie",
302             "Cookie to download gallery of (see README).",
303             &config.cookie,
304         "f|file",
305             "Filename to save the JSON list of images to.",
306             &config.listFile,
307         "d|dir",
308             "Target image directory.",
309             &config.targetDirectory,
310         "o|offset",
311             "Images to skip considering, before checking for existing images.",
312             &config.imageOffset,
313         "s|skip",
314             "Images to effectively skip downloading, after applying offset and checking for existing files.",
315             &config.imagesToSkip,
316         "n|num",
317             "Number of images to download.",
318             &config.numToDownload,
319         "r|retries",
320             "How many times to retry downloading an image.",
321             &config.retriesPerFile,
322         "D|delay",
323             "Delay between image downloads, in seconds.",
324             &config.delayBetweenImagesSeconds,
325         "t|timeout",
326             "Download attempt read timeout, in seconds.",
327             &config.requestTimeoutSeconds,
328         "always-keep",
329             "Whether or not to always keep downloaded files, even if they're not valid images.",
330             &config.alwaysKeep,
331         "dry-run",
332             "Download nothing, only echo what would be done.",
333             &config.dryRun,
334     );
335 }
336 
337 
338 // printHelp
339 /++
340     Prinst the `getopt` help screen to the terminal.
341 
342     Params:
343         results = The results as returned from the `getopt` call.
344         args = The shell arguments passed to the program.
345  +/
346 void printHelp(GetoptResult results, const string[] args)
347 {
348     import prntscget.semver : PrntscgetSemVer, PrntscgetSemVerPrerelease;
349     import std.format : format;
350     import std.getopt : defaultGetoptPrinter;
351     import std.path : baseName;
352 
353     enum banner = "prntscget v%d.%d.%d%s, built on %s".format(
354         PrntscgetSemVer.majorVersion,
355         PrntscgetSemVer.minorVersion,
356         PrntscgetSemVer.patchVersion,
357         PrntscgetSemVerPrerelease,
358         __TIMESTAMP__);
359 
360     immutable usageLine = "%s\n\nusage: %s [options]\n"
361         .format(banner, args[0].baseName);
362     defaultGetoptPrinter(usageLine, results.options);
363 }
364 
365 
366 // enumerateImages
367 /++
368     Enumerate images, skipping existing ones.
369 
370     Params:
371         images = [std.array.Appender] containing references to all images to download.
372         listJSON = JSON list of images to download.
373         config = The current [Configuration] of all getopt values aggregated.
374 
375     Returns:
376         The number of images that should be downloaded.
377  +/
378 uint enumerateImages(ref Appender!(RemoteImage[]) images,
379     const JSONValue listJSON,
380     const Configuration config)
381 {
382     import std.range : drop, enumerate, retro;
383 
384     uint numExistingImages;
385     bool outputPreamble;
386 
387     auto range = listJSON["result"]["screens"]
388         .array
389         .retro
390         .drop(config.imageOffset)
391         .enumerate;
392 
393     foreach (immutable i, imageJSON; range)
394     {
395         import std.array : replace, replaceFirst;
396         import std.file : exists;
397         import std.path : buildPath, extension;
398 
399         // Break early to cover the case of numToDownload == 0
400         if (images.data.length == config.numToDownload) break;
401 
402         immutable url = imageJSON["url"].str;
403         immutable filename = imageJSON["date"].str
404             .replace(" ", "_")
405             .replaceFirst(":", "h")
406             .replaceFirst(":", "m") ~ url.extension;
407         immutable localPath = buildPath(config.targetDirectory, filename);
408 
409         if (localPath.exists)
410         {
411             import std.algorithm.comparison : max;
412             import std.file : getSize;
413             import std.stdio : File, stdout, write;
414 
415             enum maxImageEndingMarkerLength = 12;  // JPEG 2, PNG 12
416 
417             immutable localPathSize = getSize(localPath);
418             immutable seekPos = max(localPathSize-maxImageEndingMarkerLength, 0);
419             auto existingFile = File(localPath, "r");
420             ubyte[maxImageEndingMarkerLength] buf;
421 
422             if (!outputPreamble)
423             {
424                 write("verifying existing images ");
425                 outputPreamble = true;
426             }
427 
428             scope(exit) stdout.flush();
429 
430             existingFile.seek(seekPos);
431             const existingFileEnding = existingFile.rawRead(buf[]);
432 
433             if (hasValidJPEGEnding(existingFileEnding) || hasValidPNGEnding(existingFileEnding))
434             {
435                 write('.');
436                 ++numExistingImages;
437                 // continue without appending the image entry
438                 continue;
439             }
440             else
441             {
442                 // drop down to append the image entry and re-download the file
443                 write('!');
444             }
445         }
446 
447         images ~= RemoteImage(url, localPath, (i + config.imageOffset));
448     }
449 
450     return numExistingImages;
451 }
452 
453 
454 // downloadAllImages
455 /++
456     Downloads all images in the passed `images` list.
457 
458     Images are saved to the filename specified in each [RemoteImage.localPath].
459 
460     Params:
461         images = The list of images to download.
462         config = The current program [Configuration].
463         headers = HTTP GET headers to supply when downloading.
464  +/
465 void downloadAllImages(const RemoteImage[] images,
466     const Configuration config,
467     const string[string] headers)
468 {
469     import std.array : Appender;
470     import core.time : msecs, seconds;
471 
472     enum initialAppenderSize = 1_048_576 * 4;
473 
474     immutable delayBetweenImages = (cast(int)(1000 * config.delayBetweenImagesSeconds)).msecs;
475     immutable requestTimeout = config.requestTimeoutSeconds.seconds;
476 
477     Appender!(ubyte[]) buffer;
478     buffer.reserve(initialAppenderSize);
479 
480     imageloop:
481     foreach (immutable i, const image; images)
482     {
483         import std.stdio : writeln;
484 
485         scope(exit) writeln();
486 
487         retryloop:
488         foreach (immutable retry; 0..config.retriesPerFile)
489         {
490             import std.net.curl : CurlException, CurlTimeoutException; //, HTTPStatusException;
491             import std.stdio : stdout, write;
492 
493             try
494             {
495                 immutable firstRetry = (retry == 0);
496                 immutable firstImage = (i == 0);
497 
498                 if (config.dryRun || (firstImage && firstRetry))
499                 {
500                     // Do nothing
501                 }
502                 else
503                 {
504                     import core.thread : Thread;
505                     Thread.sleep(delayBetweenImages);
506                 }
507 
508                 if (retry == 0)
509                 {
510                     import std.stdio : writef;
511                     writef("[%4d] %s --> %s: ", image.number, image.url, image.localPath);
512                     stdout.flush();
513                 }
514 
515                 immutable code = config.dryRun ? MagicNumber.dryRunSkip :
516                     downloadImage(buffer, image.url, image.localPath, requestTimeout, headers, config.alwaysKeep);
517 
518                 switch (code)
519                 {
520                 case 200:
521                     // HTTP OK
522                     write("ok");
523                     //stdout.flush();  // scopeguard writelns
524                     continue imageloop;
525 
526                 case MagicNumber.fileIsNotAnImage:
527                     // magic number, non-image file was saved
528                     // If config.alwaysKeep is true we should proceed with the next image;
529                     // otherwise retry this one.
530                     write('.');
531                     stdout.flush();
532                     if (config.alwaysKeep) continue imageloop;
533                     else continue retryloop;
534 
535                 case MagicNumber.dryRunSkip:
536                     // magic number, it is a dry run
537                     write("skip");
538                     //stdout.flush();  // scopeguard writelns
539                     continue imageloop;
540 
541                 case 403:  // HTTP Forbidden
542                     // Throttled?
543                     write(" !", code, '!');
544                     stdout.flush();
545                     continue retryloop;
546 
547                 default:
548                     write(" ?", code, '?');
549                     stdout.flush();
550                     continue retryloop;
551                 }
552             }
553             catch (CurlTimeoutException e)
554             {
555                 // Retry
556                 write(" (", e.msg, ')');
557                 stdout.flush();
558                 //continue retryloop;
559             }
560             catch (CurlException e)
561             {
562                 // Unexpected network error; retry
563                 write(" (", e.msg, ')');
564                 stdout.flush();
565                 //continue retryloop;
566             }
567             /*catch (HTTPStatusException e)
568             {
569                 // 404?
570                 write(" !", e.msg, '!');
571                 stdout.flush();
572                 //continue retryloop;
573             }*/
574             catch (Exception e)
575             {
576                 writeln();
577                 writeln(e);
578                 writeln("FIXME: add a catch for this type of exception");
579                 //continue retryloop;
580             }
581         }
582     }
583 }
584 
585 
586 // buildHeaders
587 /++
588     Builds an associative array of HTTP GET headers to use when requesting
589     information of images from the server.
590 
591     Returns:
592         A `string[string]` associative array of headers.
593  +/
594 string[string] buildHeaders() pure @safe nothrow
595 {
596     return
597     [
598         "user-agent"      : "Mozilla/5.0 (X11; Linux x86_64; rv:93.0) Gecko/20100101 Firefox/93.0",
599         "accept"          : "text/html,application/xhtml+xml,application/xml;q=0.9,image/png,image/jpeg,*/*;q=0.8",
600         "accept-language" : "en-US,en;q=0.5",
601         "accept-encoding" : "gzip, deflate, br",
602         "dnt"             : "1",
603         "cookie"          : "G_ENABLED_IDPS=google; G_AUTHUSER_H=0", //; auth_=" ~ cookie,
604         "upgrade-insecure-requests" : "1",
605         "sec-fetch-dest"  : "document",
606         "sec-fetch-mode"  : "navigate",
607         "sec-fetch-site"  : "none",
608         "sec-fetch-user"  : "?1",
609         "sec-GPC"         : "1",
610     ];
611 }
612 
613 
614 // downloadImage
615 /++
616     Downloads an image from the `prnt.sc` (`prntscr.com`) server.
617 
618     Params:
619         buffer = Appender to save the downloaded image to.
620         url = HTTP URL to fetch.
621         imagePath = Filename to save the downloaded image to.
622         requestTimeout = Timeout to use when downloading.
623         headers = HTTP GET headers to supply when downloading.
624         alwaysKeep = Whether or not to always keep downloaded files,
625             even if they're not valid images.
626 
627     Returns:
628         The HTTP code encountered when attempting to download the image.
629         The magic number [MagicNumber.fileIsNotAnImage] is returned if the
630         downloaded file was detected as not an image.
631  +/
632 int downloadImage(ref Appender!(ubyte[]) buffer,
633     const string url,
634     const string imagePath,
635     const Duration requestTimeout,
636     const string[string] headers,
637     const bool alwaysKeep)
638 {
639     import std.net.curl : HTTP;
640 
641     auto http = HTTP(url);
642     http.dnsTimeout = requestTimeout;
643     http.connectTimeout = requestTimeout;
644     http.dataTimeout = requestTimeout;
645     http.clearRequestHeaders();
646 
647     foreach (immutable header, immutable value; headers)
648     {
649         http.addRequestHeader(header, value);
650     }
651 
652     scope(exit) buffer.clear();
653 
654     http.onReceive = (ubyte[] data)
655     {
656         buffer.put(data);
657         return data.length;
658     };
659 
660     http.perform();
661 
662     if (http.statusLine.code == 200)
663     {
664         immutable validImage = (hasValidPNGEnding(buffer.data) || hasValidJPEGEnding(buffer.data));
665 
666         if (validImage || alwaysKeep)
667         {
668             import std.stdio : File;
669             File(imagePath, "w").rawWrite(buffer.data);
670         }
671 
672         if (!validImage)
673         {
674             // Interrupted download? Cloudflare error page?
675             return MagicNumber.fileIsNotAnImage;
676         }
677     }
678 
679     return http.statusLine.code;
680 }
681 
682 
683 // hasValidJPEGEnding
684 /++
685     Detects whether or not a passed array of bytes has a valid JPEG ending.
686 
687     Params:
688         fileContents = Contents of a (possibly) JPEG file.
689  +/
690 bool hasValidJPEGEnding(const ubyte[] fileContents) pure @safe @nogc nothrow
691 {
692     import std.algorithm.searching : endsWith;
693 
694     static immutable ubyte[2] eoi = [ 0xFF, 0xD9 ];
695     return fileContents.endsWith(eoi[]);
696 }
697 
698 
699 // hasValidPNGEnding
700 /++
701     Detects whether or not a passed array of bytes has a valid PNG ending.
702 
703     Params:
704         fileContents = Contents of a (possibly) PNG file.
705  +/
706 bool hasValidPNGEnding(const ubyte[] fileContents) pure @safe @nogc nothrow
707 {
708     import std.algorithm.searching : endsWith;
709 
710     static immutable ubyte[12] iend = [ 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82 ];
711     return fileContents.endsWith(iend[]);
712 }
713 
714 
715 // ensureImageDirectory
716 /++
717     Ensures the target image directory exists, creating it if it does not and
718     returning false if it fails to.
719 
720     Params:
721         targetDirectory = Target directory to ensure existence of.
722 
723     Returns:
724         `true` if the directory already exists or if it was succesfully created;
725         `false` if it could not be.
726  +/
727 bool ensureImageDirectory(const string targetDirectory) @safe
728 {
729     import std.file : exists, isDir, mkdir;
730 
731     if (!targetDirectory.exists)
732     {
733         mkdir(targetDirectory);
734         return true;
735     }
736     else if (!targetDirectory.isDir)
737     {
738         return false;
739     }
740 
741     return true;
742 }
743 
744 
745 // getImageList
746 /++
747     Fetches the JSON list of images for a passed cookie from the `prnt.sc` (`prntscr.com`) server.
748 
749     Params:
750         headers = HTTP GET headers to supply when fetching the list.
751         requestTimeoutSeconds = Request timeout when downloading the list.
752 
753     Returns:
754         An array containing the response body of the request.
755  +/
756 string getImageList(const string[string] headers, const uint requestTimeoutSeconds)
757 {
758     import std.array : Appender;
759     import std.net.curl : HTTP;
760     import core.time : seconds;
761 
762     enum url = "https://api.prntscr.com/v1/";
763     enum postData = `{"jsonrpc":"2.0","method":"get_user_screens","id":1,"params":{"count":10000}}`;
764     enum webform = "application/x-www-form-urlencoded";
765     enum initialAppenderSize = 1_048_576 * 2;
766 
767     auto http = HTTP(url);
768     immutable requestTimeout = requestTimeoutSeconds.seconds;
769     http.dnsTimeout = requestTimeout;
770     http.connectTimeout = requestTimeout;
771     http.dataTimeout = requestTimeout;
772     http.clearRequestHeaders();
773     http.setPostData(postData, webform);
774 
775     foreach (immutable header, immutable value; headers)
776     {
777         http.addRequestHeader(header, value);
778     }
779 
780     Appender!(ubyte[]) sink;
781     sink.reserve(initialAppenderSize);
782 
783     http.onReceive = (ubyte[] data)
784     {
785         sink.put(data);
786         return data.length;
787     };
788 
789     http.perform();
790     return (http.statusLine.code == 200) ? cast(string)sink.data : string.init;
791 }
792 
793 
794 // plurality
795 /++
796     Chooses between two values based on if the passed numeric value is one or many.
797 
798     Params:
799         num = Number of items.
800         singular = Singular value.
801         plural = Plural value.
802 
803     Returns:
804         Either the singular or the plural form, based on the value of `num`.
805  +/
806 T plurality(T, N)(N num, T singular, T plural) pure @safe @nogc nothrow
807 {
808     return ((num == 1) || (num == -1)) ? singular : plural;
809 }