diff options
author | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-10-01 16:29:39 +0300 |
---|---|---|
committer | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-10-01 16:29:39 +0300 |
commit | b5c1e658fb7cd1e81c512323c6c3fa6dfb565522 (patch) | |
tree | 38d5682b56d7989c85cca519a0d94701eb0055e6 | |
parent | 692db72f23b6157aad2d450ec47410115c1fb76f (diff) |
Feeds: Don't forget entries or their unread status
Three important changes:
1) Visited URLs can be marked as "kept" so they will never be discarded due to old age.
2) Feed entries are not discarded from the database until they are removed from the source, and then become too old (six months).
3) Visited feed entry URLs are always flagged as kept, so the (un)read status will not be forgotten.
-rw-r--r-- | src/feeds.c | 53 | ||||
-rw-r--r-- | src/gmutil.c | 10 | ||||
-rw-r--r-- | src/gmutil.h | 1 | ||||
-rw-r--r-- | src/ui/sidebarwidget.c | 2 | ||||
-rw-r--r-- | src/visited.c | 39 | ||||
-rw-r--r-- | src/visited.h | 5 |
6 files changed, 96 insertions, 14 deletions
diff --git a/src/feeds.c b/src/feeds.c index 86f33367..3417b4e9 100644 --- a/src/feeds.c +++ b/src/feeds.c | |||
@@ -317,10 +317,10 @@ static iBool updateEntries_Feeds_(iFeeds *d, iBool isHeadings, uint32_t sourceId | |||
317 | /* Entries are removed from `incoming` if they are added to the Feeds entries array. | 317 | /* Entries are removed from `incoming` if they are added to the Feeds entries array. |
318 | Anything remaining in `incoming` will be deleted afterwards. */ | 318 | Anything remaining in `incoming` will be deleted afterwards. */ |
319 | iBool gotNew = iFalse; | 319 | iBool gotNew = iFalse; |
320 | lock_Mutex(d->mtx); | ||
321 | iTime now; | 320 | iTime now; |
322 | initCurrent_Time(&now); | 321 | initCurrent_Time(&now); |
323 | if (isHeadings) { | 322 | if (isHeadings) { |
323 | lock_Mutex(d->mtx); | ||
324 | // printf("Updating sourceID %d...\n", sourceId); | 324 | // printf("Updating sourceID %d...\n", sourceId); |
325 | iStringSet *known = listHeadingEntriesFrom_Feeds_(d, sourceId); | 325 | iStringSet *known = listHeadingEntriesFrom_Feeds_(d, sourceId); |
326 | // puts(" Known URLs:"); | 326 | // puts(" Known URLs:"); |
@@ -357,16 +357,22 @@ static iBool updateEntries_Feeds_(iFeeds *d, iBool isHeadings, uint32_t sourceId | |||
357 | // puts("Done."); | 357 | // puts("Done."); |
358 | iRelease(presentInSource); | 358 | iRelease(presentInSource); |
359 | iRelease(known); | 359 | iRelease(known); |
360 | unlock_Mutex(d->mtx); | ||
360 | } | 361 | } |
361 | else { | 362 | else { |
363 | /* All visited URLs still present in the source should be kept indefinitely so their | ||
364 | read status remains correct. The Kept flag will be cleared after the URL has been | ||
365 | discarded from the entry database and enough time has passed. */ { | ||
366 | // printf("updating entries from %d:\n", sourceId); | ||
367 | iForEach(PtrArray, i, incoming) { | ||
368 | const iFeedEntry *entry = i.ptr; | ||
369 | // printf("marking as kept: {%s}\n", cstr_String(&entry->url)); | ||
370 | setUrlKept_Visited(visited_App(), &entry->url, iTrue); | ||
371 | } | ||
372 | } | ||
373 | lock_Mutex(d->mtx); | ||
362 | iForEach(PtrArray, i, incoming) { | 374 | iForEach(PtrArray, i, incoming) { |
363 | iFeedEntry *entry = i.ptr; | 375 | iFeedEntry *entry = i.ptr; |
364 | /* Disregard old incoming entries. */ | ||
365 | if (secondsSince_Time(&now, &entry->posted) >= maxAge_Visited) { | ||
366 | /* We don't remember this far back, so the unread status of the entry would | ||
367 | be incorrect. */ | ||
368 | continue; | ||
369 | } | ||
370 | size_t pos; | 376 | size_t pos; |
371 | if (locate_SortedArray(&d->entries, &entry, &pos)) { | 377 | if (locate_SortedArray(&d->entries, &entry, &pos)) { |
372 | iFeedEntry *existing = *(iFeedEntry **) at_SortedArray(&d->entries, pos); | 378 | iFeedEntry *existing = *(iFeedEntry **) at_SortedArray(&d->entries, pos); |
@@ -384,7 +390,8 @@ static iBool updateEntries_Feeds_(iFeeds *d, iBool isHeadings, uint32_t sourceId | |||
384 | changed = iTrue; | 390 | changed = iTrue; |
385 | } | 391 | } |
386 | set_String(&existing->title, &entry->title); | 392 | set_String(&existing->title, &entry->title); |
387 | existing->posted = entry->posted; | 393 | existing->posted = entry->posted; |
394 | existing->discovered = entry->discovered; /* prevent discarding */ | ||
388 | delete_FeedEntry(entry); | 395 | delete_FeedEntry(entry); |
389 | if (changed) { | 396 | if (changed) { |
390 | /* TODO: better to use a new flag for read feed entries? */ | 397 | /* TODO: better to use a new flag for read feed entries? */ |
@@ -398,8 +405,8 @@ static iBool updateEntries_Feeds_(iFeeds *d, iBool isHeadings, uint32_t sourceId | |||
398 | } | 405 | } |
399 | remove_PtrArrayIterator(&i); | 406 | remove_PtrArrayIterator(&i); |
400 | } | 407 | } |
408 | unlock_Mutex(d->mtx); | ||
401 | } | 409 | } |
402 | unlock_Mutex(d->mtx); | ||
403 | return gotNew; | 410 | return gotNew; |
404 | } | 411 | } |
405 | 412 | ||
@@ -410,6 +417,8 @@ static iThreadResult fetch_Feeds_(iThread *thread) { | |||
410 | iZap(work); | 417 | iZap(work); |
411 | iBool gotNew = iFalse; | 418 | iBool gotNew = iFalse; |
412 | postCommand_App("feeds.update.started"); | 419 | postCommand_App("feeds.update.started"); |
420 | const int totalJobs = size_PtrArray(&d->jobs); | ||
421 | int numFinishedJobs = 0; | ||
413 | while (!d->stopWorker) { | 422 | while (!d->stopWorker) { |
414 | /* Start new jobs. */ | 423 | /* Start new jobs. */ |
415 | iForIndices(i, work) { | 424 | iForIndices(i, work) { |
@@ -420,6 +429,7 @@ static iThreadResult fetch_Feeds_(iThread *thread) { | |||
420 | sleep_Thread(0.5); /* TODO: wait on a Condition so we can exit quickly */ | 429 | sleep_Thread(0.5); /* TODO: wait on a Condition so we can exit quickly */ |
421 | if (d->stopWorker) break; | 430 | if (d->stopWorker) break; |
422 | size_t ongoing = 0; | 431 | size_t ongoing = 0; |
432 | iBool doNotify = iFalse; | ||
423 | iForIndices(i, work) { | 433 | iForIndices(i, work) { |
424 | if (work[i]) { | 434 | if (work[i]) { |
425 | if (isFinished_GmRequest(work[i]->request)) { | 435 | if (isFinished_GmRequest(work[i]->request)) { |
@@ -429,11 +439,15 @@ static iThreadResult fetch_Feeds_(iThread *thread) { | |||
429 | d, work[i]->checkHeadings, work[i]->bookmarkId, &work[i]->results); | 439 | d, work[i]->checkHeadings, work[i]->bookmarkId, &work[i]->results); |
430 | delete_FeedJob(work[i]); | 440 | delete_FeedJob(work[i]); |
431 | work[i] = NULL; | 441 | work[i] = NULL; |
442 | numFinishedJobs++; | ||
443 | doNotify = iTrue; | ||
432 | } | 444 | } |
433 | else if (isTimedOut_FeedJob_(work[i])) { | 445 | else if (isTimedOut_FeedJob_(work[i])) { |
434 | /* Maybe we'll get it next time! */ | 446 | /* Maybe we'll get it next time! */ |
435 | delete_FeedJob(work[i]); | 447 | delete_FeedJob(work[i]); |
436 | work[i] = NULL; | 448 | work[i] = NULL; |
449 | numFinishedJobs++; | ||
450 | doNotify = iTrue; | ||
437 | } | 451 | } |
438 | else { | 452 | else { |
439 | ongoing++; | 453 | ongoing++; |
@@ -441,6 +455,9 @@ static iThreadResult fetch_Feeds_(iThread *thread) { | |||
441 | /* TODO: abort job if it takes too long (> 15 seconds?) */ | 455 | /* TODO: abort job if it takes too long (> 15 seconds?) */ |
442 | } | 456 | } |
443 | } | 457 | } |
458 | if (doNotify) { | ||
459 | postCommandf_App("feeds.update.progress arg:%d total:%d", numFinishedJobs, totalJobs); | ||
460 | } | ||
444 | /* Stop if everything has finished. */ | 461 | /* Stop if everything has finished. */ |
445 | if (ongoing == 0 && isEmpty_PtrArray(&d->jobs)) { | 462 | if (ongoing == 0 && isEmpty_PtrArray(&d->jobs)) { |
446 | break; | 463 | break; |
@@ -448,6 +465,24 @@ static iThreadResult fetch_Feeds_(iThread *thread) { | |||
448 | } | 465 | } |
449 | initCurrent_Time(&d->lastRefreshedAt); | 466 | initCurrent_Time(&d->lastRefreshedAt); |
450 | save_Feeds_(d); | 467 | save_Feeds_(d); |
468 | /* Check if there are visited URLs marked as Kept that can be cleared because they are no | ||
469 | longer present in the database. */ { | ||
470 | iStringSet *knownEntryUrls = new_StringSet(); | ||
471 | lock_Mutex(d->mtx); | ||
472 | iConstForEach(Array, i, &d->entries.values) { | ||
473 | const iFeedEntry *entry = *(const iFeedEntry **) i.value; | ||
474 | insert_StringSet(knownEntryUrls, &entry->url); | ||
475 | } | ||
476 | unlock_Mutex(d->mtx); | ||
477 | iConstForEach(PtrArray, j, listKept_Visited(visited_App())) { | ||
478 | iVisitedUrl *visUrl = j.ptr; | ||
479 | if (!contains_StringSet(knownEntryUrls, &visUrl->url)) { | ||
480 | visUrl->flags &= ~kept_VisitedUrlFlag; | ||
481 | // printf("unkept: {%s}\n", cstr_String(&visUrl->url)); | ||
482 | } | ||
483 | } | ||
484 | iRelease(knownEntryUrls); | ||
485 | } | ||
451 | postCommandf_App("feeds.update.finished arg:%d unread:%zu", gotNew ? 1 : 0, | 486 | postCommandf_App("feeds.update.finished arg:%d unread:%zu", gotNew ? 1 : 0, |
452 | numUnread_Feeds()); | 487 | numUnread_Feeds()); |
453 | return 0; | 488 | return 0; |
diff --git a/src/gmutil.c b/src/gmutil.c index 1d361875..971747d4 100644 --- a/src/gmutil.c +++ b/src/gmutil.c | |||
@@ -137,6 +137,16 @@ void stripDefaultUrlPort_String(iString *d) { | |||
137 | } | 137 | } |
138 | } | 138 | } |
139 | 139 | ||
140 | const iString *urlQueryStripped_String(const iString *url) { | ||
141 | size_t pos = indexOfCStr_String(url, "?"); | ||
142 | if (pos != iInvalidPos) { | ||
143 | iString *stripped = collect_String(copy_String(url)); | ||
144 | truncate_Block(&stripped->chars, pos); | ||
145 | return stripped; | ||
146 | } | ||
147 | return url; | ||
148 | } | ||
149 | |||
140 | iBool isDataUrl_String(const iString *d) { | 150 | iBool isDataUrl_String(const iString *d) { |
141 | return startsWithCase_String(d, "data:"); | 151 | return startsWithCase_String(d, "data:"); |
142 | } | 152 | } |
diff --git a/src/gmutil.h b/src/gmutil.h index 3c10d45b..c65a17e6 100644 --- a/src/gmutil.h +++ b/src/gmutil.h | |||
@@ -123,6 +123,7 @@ void punyEncodeDomain_Rangecc(iRangecc domain, iString *encoded_out); | |||
123 | void punyEncodeUrlHost_String(iString *absoluteUrl); | 123 | void punyEncodeUrlHost_String(iString *absoluteUrl); |
124 | void stripDefaultUrlPort_String(iString *); | 124 | void stripDefaultUrlPort_String(iString *); |
125 | const iString * urlFragmentStripped_String(const iString *); | 125 | const iString * urlFragmentStripped_String(const iString *); |
126 | const iString * urlQueryStripped_String (const iString *); | ||
126 | void urlDecodePath_String (iString *); | 127 | void urlDecodePath_String (iString *); |
127 | void urlEncodePath_String (iString *); | 128 | void urlEncodePath_String (iString *); |
128 | iString * makeFileUrl_String (const iString *localFilePath); | 129 | iString * makeFileUrl_String (const iString *localFilePath); |
diff --git a/src/ui/sidebarwidget.c b/src/ui/sidebarwidget.c index 4b772eeb..fcdb5b9e 100644 --- a/src/ui/sidebarwidget.c +++ b/src/ui/sidebarwidget.c | |||
@@ -1463,7 +1463,7 @@ static iBool processEvent_SidebarWidget_(iSidebarWidget *d, const SDL_Event *ev) | |||
1463 | removeUrl_Visited(vis, url); | 1463 | removeUrl_Visited(vis, url); |
1464 | } | 1464 | } |
1465 | else { | 1465 | else { |
1466 | visitUrl_Visited(vis, url, transient_VisitedUrlFlag); | 1466 | visitUrl_Visited(vis, url, transient_VisitedUrlFlag | kept_VisitedUrlFlag); |
1467 | } | 1467 | } |
1468 | postCommand_App("visited.changed"); | 1468 | postCommand_App("visited.changed"); |
1469 | return iTrue; | 1469 | return iTrue; |
diff --git a/src/visited.c b/src/visited.c index e9f691c6..466add5b 100644 --- a/src/visited.c +++ b/src/visited.c | |||
@@ -109,11 +109,13 @@ void load_Visited(iVisited *d, const char *dirPath) { | |||
109 | const char *urlStart = skipSpace_CStr(endp); | 109 | const char *urlStart = skipSpace_CStr(endp); |
110 | iVisitedUrl item; | 110 | iVisitedUrl item; |
111 | item.when.ts = (struct timespec){ .tv_sec = ts }; | 111 | item.when.ts = (struct timespec){ .tv_sec = ts }; |
112 | if (secondsSince_Time(&now, &item.when) > maxAge_Visited) { | 112 | if (~flags & kept_VisitedUrlFlag && |
113 | secondsSince_Time(&now, &item.when) > maxAge_Visited) { | ||
113 | continue; /* Too old. */ | 114 | continue; /* Too old. */ |
114 | } | 115 | } |
115 | item.flags = flags; | 116 | item.flags = flags; |
116 | initRange_String(&item.url, (iRangecc){ urlStart, line.end }); | 117 | initRange_String(&item.url, (iRangecc){ urlStart, line.end }); |
118 | set_String(&item.url, &item.url); | ||
117 | insert_SortedArray(&d->visited, &item); | 119 | insert_SortedArray(&d->visited, &item); |
118 | } | 120 | } |
119 | unlock_Mutex(d->mtx); | 121 | unlock_Mutex(d->mtx); |
@@ -153,6 +155,9 @@ void visitUrl_Visited(iVisited *d, const iString *url, uint16_t visitFlags) { | |||
153 | lock_Mutex(d->mtx); | 155 | lock_Mutex(d->mtx); |
154 | if (locate_SortedArray(&d->visited, &visit, &pos)) { | 156 | if (locate_SortedArray(&d->visited, &visit, &pos)) { |
155 | iVisitedUrl *old = at_SortedArray(&d->visited, pos); | 157 | iVisitedUrl *old = at_SortedArray(&d->visited, pos); |
158 | if (old->flags & kept_VisitedUrlFlag) { | ||
159 | visitFlags |= kept_VisitedUrlFlag; /* must continue to be kept */ | ||
160 | } | ||
156 | if (cmpNewer_VisitedUrl_(&visit, old)) { | 161 | if (cmpNewer_VisitedUrl_(&visit, old)) { |
157 | old->when = visit.when; | 162 | old->when = visit.when; |
158 | old->flags = visitFlags; | 163 | old->flags = visitFlags; |
@@ -165,6 +170,21 @@ void visitUrl_Visited(iVisited *d, const iString *url, uint16_t visitFlags) { | |||
165 | unlock_Mutex(d->mtx); | 170 | unlock_Mutex(d->mtx); |
166 | } | 171 | } |
167 | 172 | ||
173 | void setUrlKept_Visited(iVisited *d, const iString *url, iBool isKept) { | ||
174 | if (isEmpty_String(url)) return; | ||
175 | iVisitedUrl visit; | ||
176 | init_VisitedUrl(&visit); | ||
177 | set_String(&visit.url, canonicalUrl_String(url)); | ||
178 | size_t pos; | ||
179 | lock_Mutex(d->mtx); | ||
180 | if (locate_SortedArray(&d->visited, &visit, &pos)) { | ||
181 | iVisitedUrl *vis = at_SortedArray(&d->visited, pos); | ||
182 | iChangeFlags(vis->flags, kept_VisitedUrlFlag, isKept); | ||
183 | } | ||
184 | unlock_Mutex(d->mtx); | ||
185 | deinit_VisitedUrl(&visit); | ||
186 | } | ||
187 | |||
168 | void removeUrl_Visited(iVisited *d, const iString *url) { | 188 | void removeUrl_Visited(iVisited *d, const iString *url) { |
169 | url = canonicalUrl_String(url); | 189 | url = canonicalUrl_String(url); |
170 | iGuardMutex(d->mtx, { | 190 | iGuardMutex(d->mtx, { |
@@ -183,7 +203,7 @@ iTime urlVisitTime_Visited(const iVisited *d, const iString *url) { | |||
183 | iVisitedUrl item; | 203 | iVisitedUrl item; |
184 | size_t pos; | 204 | size_t pos; |
185 | iZap(item); | 205 | iZap(item); |
186 | initCopy_String(&item.url, url); | 206 | initCopy_String(&item.url, canonicalUrl_String(url)); |
187 | lock_Mutex(d->mtx); | 207 | lock_Mutex(d->mtx); |
188 | if (locate_SortedArray(&d->visited, &item, &pos)) { | 208 | if (locate_SortedArray(&d->visited, &item, &pos)) { |
189 | item.when = ((const iVisitedUrl *) constAt_SortedArray(&d->visited, pos))->when; | 209 | item.when = ((const iVisitedUrl *) constAt_SortedArray(&d->visited, pos))->when; |
@@ -203,7 +223,7 @@ static int cmpWhenDescending_VisitedUrlPtr_(const void *a, const void *b) { | |||
203 | return -cmp_Time(&s->when, &t->when); | 223 | return -cmp_Time(&s->when, &t->when); |
204 | } | 224 | } |
205 | 225 | ||
206 | const iArray *list_Visited(const iVisited *d, size_t count) { | 226 | const iPtrArray *list_Visited(const iVisited *d, size_t count) { |
207 | iPtrArray *urls = collectNew_PtrArray(); | 227 | iPtrArray *urls = collectNew_PtrArray(); |
208 | iGuardMutex(d->mtx, { | 228 | iGuardMutex(d->mtx, { |
209 | iConstForEach(Array, i, &d->visited.values) { | 229 | iConstForEach(Array, i, &d->visited.values) { |
@@ -219,3 +239,16 @@ const iArray *list_Visited(const iVisited *d, size_t count) { | |||
219 | } | 239 | } |
220 | return urls; | 240 | return urls; |
221 | } | 241 | } |
242 | |||
243 | const iPtrArray *listKept_Visited(const iVisited *d) { | ||
244 | iPtrArray *urls = collectNew_PtrArray(); | ||
245 | iGuardMutex(d->mtx, { | ||
246 | iConstForEach(Array, i, &d->visited.values) { | ||
247 | const iVisitedUrl *vis = i.value; | ||
248 | if (vis->flags & kept_VisitedUrlFlag) { | ||
249 | pushBack_PtrArray(urls, vis); | ||
250 | } | ||
251 | } | ||
252 | }); | ||
253 | return urls; | ||
254 | } | ||
diff --git a/src/visited.h b/src/visited.h index fec0b4c3..1f2d4fcf 100644 --- a/src/visited.h +++ b/src/visited.h | |||
@@ -41,6 +41,7 @@ struct Impl_VisitedUrl { | |||
41 | 41 | ||
42 | enum iVisitedUrlFlag { | 42 | enum iVisitedUrlFlag { |
43 | transient_VisitedUrlFlag = 0x1, /* redirected; don't show in history */ | 43 | transient_VisitedUrlFlag = 0x1, /* redirected; don't show in history */ |
44 | kept_VisitedUrlFlag = 0x2, /* don't discard this even after max age */ | ||
44 | }; | 45 | }; |
45 | 46 | ||
46 | iDeclareType(Visited) | 47 | iDeclareType(Visited) |
@@ -52,7 +53,9 @@ void save_Visited (const iVisited *, const char *dirPath); | |||
52 | 53 | ||
53 | iTime urlVisitTime_Visited (const iVisited *, const iString *url); | 54 | iTime urlVisitTime_Visited (const iVisited *, const iString *url); |
54 | void visitUrl_Visited (iVisited *, const iString *url, uint16_t visitFlags); /* adds URL to the visited URLs set */ | 55 | void visitUrl_Visited (iVisited *, const iString *url, uint16_t visitFlags); /* adds URL to the visited URLs set */ |
56 | void setUrlKept_Visited (iVisited *, const iString *url, iBool isKept); /* URL is marked as (non)discardable */ | ||
55 | void removeUrl_Visited (iVisited *, const iString *url); | 57 | void removeUrl_Visited (iVisited *, const iString *url); |
56 | iBool containsUrl_Visited (const iVisited *, const iString *url); | 58 | iBool containsUrl_Visited (const iVisited *, const iString *url); |
57 | 59 | ||
58 | const iPtrArray * list_Visited (const iVisited *, size_t count); /* returns collected */ | 60 | const iPtrArray * list_Visited (const iVisited *, size_t count); /* returns collected */ |
61 | const iPtrArray * listKept_Visited (const iVisited *); | ||