1 // Copyright (c) 2017-2020 Matthew Brennan Jones <matthew.brennan.jones@gmail.com>
2 // Boost Software License - Version 1.0
3 // Get web browser history with the D programming language
4 // https://github.com/workhorsy/d-web-browser-history
5 
6 /++
7 Get web browser history with the D programming language
8 
9 Home page:
10 $(LINK https://github.com/workhorsy/d-web-browser-history)
11 
12 Version: 2.0.0
13 
14 License:
15 Boost Software License - Version 1.0
16 
17 Examples:
18 ----
19 import web_browser_history;
20 import std.stdio : stdout;
21 import derelict.sqlite3.sqlite : DerelictSQLite3;
22 
23 DerelictSQLite3.load();
24 
25 foreach (browser ; web_browser_history.getInstalledBrowsers()) {
26 	web_browser_history.readHistory(browser, delegate(string url, int visit_count) {
27 		stdout.writefln("browser:%s, url:%s, count:%s", browser, url, visit_count);
28 	});
29 }
30 ----
31 +/
32 
33 
34 module web_browser_history;
35 
36 import derelict.sqlite3.sqlite;
37 
38 /++
39 The supported web browsers.
40 
41 ----
42 enum WebBrowser {
43 	Brave,
44 	Chrome,
45 	Chromium,
46 	Firefox,
47 	Opera,
48 }
49 ----
50 +/
51 
52 enum WebBrowser {
53 	Brave,
54 	Chrome,
55 	Chromium,
56 	Firefox,
57 	Opera,
58 }
59 
60 private void delegate(string url, int visit_count) g_each_row_cb;
61 
62 private static string[] black_list = [
63 	"bank", "credit union", "bond", "invest", "hospital", "medical", "private",
64 	"account"
65 ];
66 
67 private extern (C) int callback(void* NotUsed, int argc, char** argv, char** azColName) {
68 	import std.conv : to;
69 	import std.algorithm : any, count;
70 	import std..string : fromStringz;
71 
72 	string url = (cast(string) fromStringz(argv[0] ? argv[0] : "NULL")).dup;
73 	auto visit_count = cast(string) fromStringz(argv[1] ? argv[1] : "0");
74 	int total = to!int(visit_count);
75 
76 	bool is_black_listed = black_list.any!(entry => url.count(entry) > 0);
77 
78 	if (! is_black_listed) {
79 		g_each_row_cb(url, total);
80 	}
81 	return 0;
82 }
83 
84 private string[] getHistoryPaths(WebBrowser browser) {
85 	version (unittest) {
86 		final switch (browser) {
87 			case WebBrowser.Firefox:
88 				return getHistoryPaths("firefox_history.sqlite", ["test_browser_data"]);
89 			case WebBrowser.Chrome:
90 				return getHistoryPaths("chrome_history.sqlite", ["test_browser_data"]);
91 			case WebBrowser.Chromium:
92 				return getHistoryPaths("chromium_history.sqlite", ["test_browser_data"]);
93 			case WebBrowser.Opera:
94 				return getHistoryPaths("opera_history.sqlite", ["test_browser_data"]);
95 			case WebBrowser.Brave:
96 				return getHistoryPaths("brave_history.sqlite", ["test_browser_data"]);
97 		}
98 	} else {
99 		final switch (browser) {
100 			case WebBrowser.Firefox:
101 				return getHistoryPaths("places.sqlite", ["~/.mozilla/firefox/", "%APPDATA%/Mozilla/Firefox/"]);
102 			case WebBrowser.Chrome:
103 				return getHistoryPaths("History", ["~/.config/google-chrome/", "%LOCALAPPDATA%/Google/Chrome/"]);
104 			case WebBrowser.Chromium:
105 				return getHistoryPaths("History", ["~/.config/chromium/"]);
106 			case WebBrowser.Opera:
107 				return getHistoryPaths("History", ["~/.config/opera/", "%APPDATA%/Opera Software/Opera Stable/"]);
108 			case WebBrowser.Brave:
109 				return getHistoryPaths("History", ["~/.config/brave/", "%APPDATA%/brave/"]);
110 		}
111 	}
112 }
113 
114 private string[] getHistoryPaths(string file_name, string[] settings_paths) {
115 	import std.file : exists, DirIterator, dirEntries, SpanMode, FileException;
116 	import std.path : baseName;
117 
118 	string[] paths;
119 	foreach (settings_path; settings_paths) {
120 		string full_path = expandPath(settings_path);
121 
122 		if (! exists(full_path)) {
123 			continue;
124 		}
125 
126 		try {
127 			DirIterator iter = dirEntries(full_path, SpanMode.breadth, true);
128 			foreach (string path; iter) {
129 				if (baseName(path) == file_name) {
130 					paths ~= path;
131 				}
132 			}
133 		} catch (FileException) {
134 			// NOTE: Ignore any FS errors from dirEntries throwing
135 		}
136 	}
137 
138 	return paths;
139 }
140 
141 private string expandPath(string path) {
142 	import std.process : environment;
143 	import std.path : expandTilde;
144 	import std.algorithm : count;
145 	import std..string : replace;
146 
147 	path = expandTilde(path);
148 	foreach (string name, string value ; environment.toAA()) {
149 		if (path.count(name) > 0) {
150 			path = path.replace("%" ~ name ~ "%", value);
151 		}
152 	}
153 	return path;
154 }
155 
156 /++
157 Returns an array of installed web browsers.
158 +/
159 WebBrowser[] getInstalledBrowsers() {
160 	import std.traits : EnumMembers;
161 
162 	// Get the installed browsers
163 	WebBrowser[] browsers;
164 	foreach (browser ; EnumMembers!WebBrowser) {
165 		if (getHistoryPaths(browser).length > 0) {
166 			browsers ~= browser;
167 		}
168 	}
169 
170 	// Use firefox as the default browser, if none are installed
171 	if (browsers.length == 0) {
172 		browsers ~= WebBrowser.Firefox;
173 	}
174 
175 	return browsers;
176 }
177 
178 ///
179 unittest {
180 	import derelict.sqlite3.sqlite : DerelictSQLite3;
181 	DerelictSQLite3.load();
182 	WebBrowser[] browsers = web_browser_history.getInstalledBrowsers();
183 
184 	// browsers output
185 	// [Brave, Chrome, Chromium, Firefox, Opera]
186 }
187 
188 /++
189 Reads all the history for the selected web browser.
190 
191 Params:
192  browser = The web browser to search
193  each_row_cb = The callback to fire for each row in the history.
194 +/
195 void readHistory(WebBrowser browser, void delegate(string url, int visit_count) each_row_cb) {
196 	import std.stdio : stdout, stderr;
197 	import std.file : exists, remove, copy;
198 	import std..string : fromStringz;
199 	import std..string : toStringz;
200 
201 	g_each_row_cb = each_row_cb;
202 
203 	string[] paths = getHistoryPaths(browser);
204 	string sql_query;
205 	final switch (browser) {
206 		case WebBrowser.Firefox:
207 			sql_query = "select url, visit_count from moz_places where hidden=0;";
208 			break;
209 		case WebBrowser.Chrome:
210 		case WebBrowser.Chromium:
211 		case WebBrowser.Opera:
212 		case WebBrowser.Brave:
213 			sql_query = "select url, visit_count from urls where hidden=0;";
214 			break;
215 	}
216 
217 	immutable string uri = "History.sqlite";
218 
219 	foreach (path; paths) {
220 		stdout.writefln("path: %s", path);
221 
222 		// Copy the browser's history file to the local directory
223 		if (exists(uri)) {
224 			remove(uri);
225 		}
226 		copy(path, uri);
227 
228 		sqlite3* db;
229 		char* zErrMsg;
230 
231 		// Open the database
232 		int rc = sqlite3_open(uri.toStringz, &db);
233 		if (rc != SQLITE_OK) {
234 			stderr.writefln("Can't open database: %s\n", cast(string) fromStringz(sqlite3_errmsg(db)));
235 			sqlite3_close(db);
236 			return;
237 		}
238 
239 		// Read the database
240 		rc = sqlite3_exec(db, sql_query.toStringz, &callback, cast(void*) 0, &zErrMsg);
241 		if (rc != SQLITE_OK) {
242 			stderr.writefln("SQL error: %s\n", cast(string) fromStringz(zErrMsg));
243 			sqlite3_free(zErrMsg);
244 			return;
245 		}
246 
247 		sqlite3_close(db);
248 	}
249 
250 	if (exists(uri)) {
251 		remove(uri);
252 	}
253 }
254 
255 ///
256 unittest {
257 	import derelict.sqlite3.sqlite : DerelictSQLite3;
258 	DerelictSQLite3.load();
259 
260 	int[string] data;
261 	web_browser_history.readHistory(WebBrowser.Chrome, delegate(string url, int visit_count) {
262 		data[url] = visit_count;
263 	});
264 
265 	// data output
266 	// ["https://dlang.org/":3, "https://www.google.com/":7, "https://www.reddit.com/":1]
267 }
268 
269 /++
270 Reads all the history for all the web browsers.
271 
272 Params:
273  each_row_cb = The callback to fire for each row in the history.
274 +/
275 void readHistoryAll(void delegate(string url, int visit_count) each_row_cb) {
276 	import std.traits : EnumMembers;
277 
278 	foreach (browser ; EnumMembers!WebBrowser) {
279 		readHistory(browser, each_row_cb);
280 	}
281 }
282 
283 ///
284 unittest {
285 	import derelict.sqlite3.sqlite : DerelictSQLite3;
286 	DerelictSQLite3.load();
287 
288 	int[string] data;
289 	web_browser_history.readHistoryAll(delegate(string url, int visit_count) {
290 		data[url] = visit_count;
291 	});
292 
293 	// data output
294 	// ["https://dlang.org/":3, "https://www.google.com/":7, "https://www.reddit.com/":1, "https://slashdot.org/":7]
295 }