1 // Copyright (c) 2017 Matthew Brennan Jones <matthew.brennan.jones@gmail.com>
2 // Boost Software License - Version 1.0
3 // Get web browser history with the D programming language
4 // https://github.com/workhorsy/d-web-browser-history
5 
6 /++
7 Get web browser history with the D programming language
8 
9 Home page:
10 $(LINK https://github.com/workhorsy/d-web-browser-history)
11 
12 Version: 1.2.0
13 
14 License:
15 Boost Software License - Version 1.0
16 
17 Examples:
18 ----
19 import web_browser_history;
20 import std.stdio : stdout;
21 
22 foreach (browser ; web_browser_history.getInstalledBrowsers()) {
23 	web_browser_history.readHistory(browser, delegate(string url, int visit_count) {
24 		stdout.writefln("browser:%s, url:%s, count:%s", browser, url, visit_count);
25 	});
26 }
27 ----
28 +/
29 
30 
31 module web_browser_history;
32 
33 import derelict.sqlite3.sqlite;
34 
35 /++
36 The supported web browsers.
37 
38 ----
39 enum WebBrowser {
40 	Brave,
41 	Chrome,
42 	Chromium,
43 	Firefox,
44 	Opera,
45 }
46 ----
47 +/
48 
49 enum WebBrowser {
50 	Brave,
51 	Chrome,
52 	Chromium,
53 	Firefox,
54 	Opera,
55 }
56 
57 private void delegate(string url, int visit_count) g_each_row_cb;
58 
59 private static string[] black_list = [
60 	"bank", "credit union", "bond", "invest", "hospital", "medical", "private",
61 	"account"
62 ];
63 
64 private extern (C) int callback(void* NotUsed, int argc, char** argv, char** azColName) {
65 	import std.conv : to;
66 	import std.algorithm : any, count;
67 	import std.string : fromStringz;
68 
69 	string url = (cast(string) fromStringz(argv[0] ? argv[0] : "NULL")).dup;
70 	auto visit_count = cast(string) fromStringz(argv[1] ? argv[1] : "0");
71 	int total = to!int(visit_count);
72 
73 	bool is_black_listed = black_list.any!(entry => url.count(entry) > 0);
74 
75 	if (! is_black_listed) {
76 		g_each_row_cb(url, total);
77 	}
78 	return 0;
79 }
80 
81 private string[] getHistoryPaths(WebBrowser browser) {
82 	version (unittest) {
83 		final switch (browser) {
84 			case WebBrowser.Firefox:
85 				return getHistoryPaths("firefox_history.sqlite", ["test_browser_data"]);
86 			case WebBrowser.Chrome:
87 				return getHistoryPaths("chrome_history.sqlite", ["test_browser_data"]);
88 			case WebBrowser.Chromium:
89 				return getHistoryPaths("chromium_history.sqlite", ["test_browser_data"]);
90 			case WebBrowser.Opera:
91 				return getHistoryPaths("opera_history.sqlite", ["test_browser_data"]);
92 			case WebBrowser.Brave:
93 				return getHistoryPaths("brave_history.sqlite", ["test_browser_data"]);
94 		}
95 	} else {
96 		final switch (browser) {
97 			case WebBrowser.Firefox:
98 				return getHistoryPaths("places.sqlite", ["~/.mozilla/firefox/", "%APPDATA%/Mozilla/Firefox/"]);
99 			case WebBrowser.Chrome:
100 				return getHistoryPaths("History", ["~/.config/google-chrome/", "%LOCALAPPDATA%/Google/Chrome/"]);
101 			case WebBrowser.Chromium:
102 				return getHistoryPaths("History", ["~/.config/chromium/"]);
103 			case WebBrowser.Opera:
104 				return getHistoryPaths("History", ["~/.config/opera/", "%APPDATA%/Opera Software/Opera Stable/"]);
105 			case WebBrowser.Brave:
106 				return getHistoryPaths("History", ["~/.config/brave/", "%APPDATA%/brave/"]);
107 		}
108 	}
109 }
110 
111 private string[] getHistoryPaths(string file_name, string[] settings_paths) {
112 	import std.file : exists, DirIterator, dirEntries, FileException;
113 	import std.path : baseName, SpanMode;
114 
115 	string[] paths;
116 	foreach (settings_path; settings_paths) {
117 		string full_path = expandPath(settings_path);
118 
119 		if (! exists(full_path)) {
120 			continue;
121 		}
122 
123 		try {
124 			DirIterator iter = dirEntries(full_path, SpanMode.breadth, true);
125 			foreach (string path; iter) {
126 				if (baseName(path) == file_name) {
127 					paths ~= path;
128 				}
129 			}
130 		} catch (FileException) {
131 			// NOTE: Ignore any FS errors from dirEntries throwing
132 		}
133 	}
134 
135 	return paths;
136 }
137 
138 private string expandPath(string path) {
139 	import std.process : environment;
140 	import std.path : expandTilde;
141 	import std.algorithm : count;
142 	import std.string : replace;
143 
144 	path = expandTilde(path);
145 	foreach (string name, string value ; environment.toAA()) {
146 		if (path.count(name) > 0) {
147 			path = path.replace("%" ~ name ~ "%", value);
148 		}
149 	}
150 	return path;
151 }
152 
153 /++
154 Returns an array of installed web browsers.
155 +/
156 WebBrowser[] getInstalledBrowsers() {
157 	import std.traits : EnumMembers;
158 
159 	// Get the installed browsers
160 	WebBrowser[] browsers;
161 	foreach (browser ; EnumMembers!WebBrowser) {
162 		if (getHistoryPaths(browser).length > 0) {
163 			browsers ~= browser;
164 		}
165 	}
166 
167 	// Use firefox as the default browser, if none are installed
168 	if (browsers.length == 0) {
169 		browsers ~= WebBrowser.Firefox;
170 	}
171 
172 	return browsers;
173 }
174 
175 ///
176 unittest {
177 	import derelict.sqlite3.sqlite : DerelictSQLite3;
178 	DerelictSQLite3.load();
179 	WebBrowser[] browsers = web_browser_history.getInstalledBrowsers();
180 
181 	// browsers output
182 	// [Brave, Chrome, Chromium, Firefox, Opera]
183 }
184 
185 /++
186 Reads all the history for the selected web browser.
187 
188 Params:
189  browser = The web browser to search
190  each_row_cb = The callback to fire for each row in the history.
191 +/
192 void readHistory(WebBrowser browser, void delegate(string url, int visit_count) each_row_cb) {
193 	import std.stdio : stdout, stderr;
194 	import std.file : exists, remove, copy;
195 	import std.string : fromStringz;
196 	import std.string : toStringz;
197 
198 	g_each_row_cb = each_row_cb;
199 
200 	string[] paths = getHistoryPaths(browser);
201 	string sql_query;
202 	final switch (browser) {
203 		case WebBrowser.Firefox:
204 			sql_query = "select url, visit_count from moz_places where hidden=0;";
205 			break;
206 		case WebBrowser.Chrome:
207 		case WebBrowser.Chromium:
208 		case WebBrowser.Opera:
209 		case WebBrowser.Brave:
210 			sql_query = "select url, visit_count from urls where hidden=0;";
211 			break;
212 	}
213 
214 	immutable string uri = "History.sqlite";
215 
216 	foreach (path; paths) {
217 		stdout.writefln("path: %s", path);
218 
219 		// Copy the browser's history file to the local directory
220 		if (exists(uri)) {
221 			remove(uri);
222 		}
223 		copy(path, uri);
224 
225 		sqlite3* db;
226 		char* zErrMsg;
227 
228 		// Open the database
229 		int rc = sqlite3_open(uri.toStringz, &db);
230 		if (rc != SQLITE_OK) {
231 			stderr.writefln("Can't open database: %s\n", cast(string) fromStringz(sqlite3_errmsg(db)));
232 			sqlite3_close(db);
233 			return;
234 		}
235 
236 		// Read the database
237 		rc = sqlite3_exec(db, sql_query.toStringz, &callback, cast(void*) 0, &zErrMsg);
238 		if (rc != SQLITE_OK) {
239 			stderr.writefln("SQL error: %s\n", cast(string) fromStringz(zErrMsg));
240 			sqlite3_free(zErrMsg);
241 			return;
242 		}
243 
244 		sqlite3_close(db);
245 	}
246 
247 	if (exists(uri)) {
248 		remove(uri);
249 	}
250 }
251 
252 ///
253 unittest {
254 	import derelict.sqlite3.sqlite : DerelictSQLite3;
255 	DerelictSQLite3.load();
256 
257 	int[string] data;
258 	web_browser_history.readHistory(WebBrowser.Chrome, delegate(string url, int visit_count) {
259 		data[url] = visit_count;
260 	});
261 
262 	// data output
263 	// ["https://dlang.org/":3, "https://www.google.com/":7, "https://www.reddit.com/":1]
264 }
265 
266 /++
267 Reads all the history for all the web browsers.
268 
269 Params:
270  each_row_cb = The callback to fire for each row in the history.
271 +/
272 void readHistoryAll(void delegate(string url, int visit_count) each_row_cb) {
273 	import std.traits : EnumMembers;
274 
275 	foreach (browser ; EnumMembers!WebBrowser) {
276 		readHistory(browser, each_row_cb);
277 	}
278 }
279 
280 ///
281 unittest {
282 	import derelict.sqlite3.sqlite : DerelictSQLite3;
283 	DerelictSQLite3.load();
284 
285 	int[string] data;
286 	web_browser_history.readHistoryAll(delegate(string url, int visit_count) {
287 		data[url] = visit_count;
288 	});
289 
290 	// data output
291 	// ["https://dlang.org/":3, "https://www.google.com/":7, "https://www.reddit.com/":1, "https://slashdot.org/":7]
292 }
293 
294 
295