From 019b11f670c6463899de76eaa728d0355ebb678a Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 31 Oct 2014 00:05:25 +0100 Subject: [PATCH] cli: search: Do not output duplicate addresses This filters out duplicate addresses from address outputs (sender, receivers). It also also adds tests for the new outputs. The code here is an extended version of a patch from Jani Nikula. --- doc/man1/notmuch-search.rst | 2 + notmuch-search.c | 51 +++++++++++++++++++--- test/T090-search-output.sh | 87 +++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 6 deletions(-) diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst index b6607c92..42f17e4a 100644 --- a/doc/man1/notmuch-search.rst +++ b/doc/man1/notmuch-search.rst @@ -85,6 +85,8 @@ Supported options for **search** include (--format=text0), as a JSON array (--format=json), or as an S-Expression list (--format=sexp). + Duplicate addresses are filtered out. + Note: Searching for **sender** should be much faster than searching for **recipients**, because sender addresses are cached directly in the database whereas other addresses diff --git a/notmuch-search.c b/notmuch-search.c index 671fe413..43d42c60 100644 --- a/notmuch-search.c +++ b/notmuch-search.c @@ -229,6 +229,27 @@ do_search_threads (search_options_t *opt) return 0; } +/* Returns TRUE iff name and addr is duplicate. */ +static notmuch_bool_t +is_duplicate (const search_options_t *opt, GHashTable *addrs, const char *name, const char *addr) +{ + notmuch_bool_t duplicate; + char *key; + + key = talloc_asprintf (opt->format, "%s <%s>", name, addr); + if (! key) + return FALSE; + + duplicate = g_hash_table_lookup_extended (addrs, key, NULL, NULL); + + if (! duplicate) + g_hash_table_insert (addrs, key, NULL); + else + talloc_free (key); + + return duplicate; +} + static void print_mailbox (const search_options_t *opt, const mailbox_t *mailbox) { @@ -263,7 +284,8 @@ print_mailbox (const search_options_t *opt, const mailbox_t *mailbox) /* Print addresses from InternetAddressList. */ static void -process_address_list (const search_options_t *opt, InternetAddressList *list) +process_address_list (const search_options_t *opt, GHashTable *addrs, + InternetAddressList *list) { InternetAddress *address; int i; @@ -279,7 +301,7 @@ process_address_list (const search_options_t *opt, InternetAddressList *list) if (group_list == NULL) continue; - process_address_list (opt, group_list); + process_address_list (opt, addrs, group_list); } else { InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); mailbox_t mbx = { @@ -287,6 +309,9 @@ process_address_list (const search_options_t *opt, InternetAddressList *list) .addr = internet_address_mailbox_get_addr (mailbox), }; + if (is_duplicate (opt, addrs, mbx.name, mbx.addr)) + continue; + print_mailbox (opt, &mbx); } } @@ -294,7 +319,7 @@ process_address_list (const search_options_t *opt, InternetAddressList *list) /* Print addresses from a message header. */ static void -process_address_header (const search_options_t *opt, const char *value) +process_address_header (const search_options_t *opt, GHashTable *addrs, const char *value) { InternetAddressList *list; @@ -305,11 +330,17 @@ process_address_header (const search_options_t *opt, const char *value) if (list == NULL) return; - process_address_list (opt, list); + process_address_list (opt, addrs, list); g_object_unref (list); } +static void +_my_talloc_free_for_g_hash (void *ptr) +{ + talloc_free (ptr); +} + static int do_search_messages (search_options_t *opt) { @@ -317,8 +348,13 @@ do_search_messages (search_options_t *opt) notmuch_messages_t *messages; notmuch_filenames_t *filenames; sprinter_t *format = opt->format; + GHashTable *addresses = NULL; int i; + if (opt->output & OUTPUT_ADDRESS_FLAGS) + addresses = g_hash_table_new_full (g_str_hash, g_str_equal, + _my_talloc_free_for_g_hash, NULL); + if (opt->offset < 0) { opt->offset += notmuch_query_count_messages (opt->query); if (opt->offset < 0) @@ -366,7 +402,7 @@ do_search_messages (search_options_t *opt) const char *addrs; addrs = notmuch_message_get_header (message, "from"); - process_address_header (opt, addrs); + process_address_header (opt, addresses, addrs); } if (opt->output & OUTPUT_RECIPIENTS) { @@ -376,7 +412,7 @@ do_search_messages (search_options_t *opt) for (j = 0; j < ARRAY_SIZE (hdrs); j++) { addrs = notmuch_message_get_header (message, hdrs[j]); - process_address_header (opt, addrs); + process_address_header (opt, addresses, addrs); } } } @@ -384,6 +420,9 @@ do_search_messages (search_options_t *opt) notmuch_message_destroy (message); } + if (addresses) + g_hash_table_unref (addresses); + notmuch_messages_destroy (messages); format->end (format); diff --git a/test/T090-search-output.sh b/test/T090-search-output.sh index 947d572e..82380ac2 100755 --- a/test/T090-search-output.sh +++ b/test/T090-search-output.sh @@ -387,6 +387,93 @@ cat <EXPECTED EOF test_expect_equal_file OUTPUT EXPECTED +test_begin_subtest "--output=sender" +notmuch search --output=sender '*' >OUTPUT +cat <EXPECTED +François Boulogne +Olivier Berger +Chris Wilson +Carl Worth +Alexander Botero-Lowry +Keith Packard +Jjgod Jiang +Rolland Santimano +Jan Janak +Stewart Smith +Lars Kellogg-Stedman +Alex Botero-Lowry +Ingmar Vanhassel +Aron Griffis +Adrian Perez de Castro +Israel Herraiz +Mikhail Gusarov +EOF +test_expect_equal_file OUTPUT EXPECTED + +test_begin_subtest "--output=sender --format=json" +notmuch search --output=sender --format=json '*' >OUTPUT +cat <EXPECTED +[{"name": "François Boulogne", "address": "boulogne.f@gmail.com", "name-addr": "François Boulogne "}, +{"name": "Olivier Berger", "address": "olivier.berger@it-sudparis.eu", "name-addr": "Olivier Berger "}, +{"name": "Chris Wilson", "address": "chris@chris-wilson.co.uk", "name-addr": "Chris Wilson "}, +{"name": "Carl Worth", "address": "cworth@cworth.org", "name-addr": "Carl Worth "}, +{"name": "Alexander Botero-Lowry", "address": "alex.boterolowry@gmail.com", "name-addr": "Alexander Botero-Lowry "}, +{"name": "Keith Packard", "address": "keithp@keithp.com", "name-addr": "Keith Packard "}, +{"name": "Jjgod Jiang", "address": "gzjjgod@gmail.com", "name-addr": "Jjgod Jiang "}, +{"name": "Rolland Santimano", "address": "rollandsantimano@yahoo.com", "name-addr": "Rolland Santimano "}, +{"name": "Jan Janak", "address": "jan@ryngle.com", "name-addr": "Jan Janak "}, +{"name": "Stewart Smith", "address": "stewart@flamingspork.com", "name-addr": "Stewart Smith "}, +{"name": "Lars Kellogg-Stedman", "address": "lars@seas.harvard.edu", "name-addr": "Lars Kellogg-Stedman "}, +{"name": "Alex Botero-Lowry", "address": "alex.boterolowry@gmail.com", "name-addr": "Alex Botero-Lowry "}, +{"name": "Ingmar Vanhassel", "address": "ingmar@exherbo.org", "name-addr": "Ingmar Vanhassel "}, +{"name": "Aron Griffis", "address": "agriffis@n01se.net", "name-addr": "Aron Griffis "}, +{"name": "Adrian Perez de Castro", "address": "aperez@igalia.com", "name-addr": "Adrian Perez de Castro "}, +{"name": "Israel Herraiz", "address": "isra@herraiz.org", "name-addr": "Israel Herraiz "}, +{"name": "Mikhail Gusarov", "address": "dottedmag@dottedmag.net", "name-addr": "Mikhail Gusarov "}] +EOF +test_expect_equal_file OUTPUT EXPECTED + +test_begin_subtest "--output=recipients" +notmuch search --output=recipients '*' >OUTPUT +cat <EXPECTED +Allan McRae +"Discussion about the Arch User Repository (AUR)" +olivier.berger@it-sudparis.eu +notmuch@notmuchmail.org +notmuch +Keith Packard +Mikhail Gusarov +EOF +test_expect_equal_file OUTPUT EXPECTED + +test_begin_subtest "--output=sender --output=recipients" +notmuch search --output=sender --output=recipients '*' >OUTPUT +cat <EXPECTED +François Boulogne +Allan McRae +"Discussion about the Arch User Repository (AUR)" +Olivier Berger +olivier.berger@it-sudparis.eu +Chris Wilson +notmuch@notmuchmail.org +Carl Worth +Alexander Botero-Lowry +Keith Packard +Jjgod Jiang +Rolland Santimano +Jan Janak +Stewart Smith +Lars Kellogg-Stedman +notmuch +Alex Botero-Lowry +Ingmar Vanhassel +Aron Griffis +Adrian Perez de Castro +Israel Herraiz +Mikhail Gusarov +EOF +test_expect_equal_file OUTPUT EXPECTED + test_begin_subtest "sanitize output for quoted-printable line-breaks in author and subject" add_message "[subject]='two =?ISO-8859-1?Q?line=0A_subject?= headers'" -- 2.39.2