From 7b259272ce48f6946e853232a9af551413a97b8d Mon Sep 17 00:00:00 2001 From: Benjamin Lee Date: Tue, 11 Jun 2024 16:33:55 +0200 Subject: [PATCH] fix: do not return redacted events from search --- src/api/client_server/search.rs | 11 +++--- src/database/key_value/rooms/search.rs | 52 +++++++++++++++++--------- src/service/pdu.rs | 17 +++++++++ src/service/rooms/search/data.rs | 2 + src/service/rooms/search/mod.rs | 10 +++++ src/service/rooms/timeline/mod.rs | 25 +++++++++++-- 6 files changed, 92 insertions(+), 25 deletions(-) diff --git a/src/api/client_server/search.rs b/src/api/client_server/search.rs index d8b58423..bf31fb4d 100644 --- a/src/api/client_server/search.rs +++ b/src/api/client_server/search.rs @@ -89,11 +89,12 @@ pub async fn search_events_route( .get_pdu_from_id(result) .ok()? .filter(|pdu| { - services() - .rooms - .state_accessor - .user_can_see_event(sender_user, &pdu.room_id, &pdu.event_id) - .unwrap_or(false) + !pdu.is_redacted() + && services() + .rooms + .state_accessor + .user_can_see_event(sender_user, &pdu.room_id, &pdu.event_id) + .unwrap_or(false) }) .map(|pdu| pdu.to_room_event()) }) diff --git a/src/database/key_value/rooms/search.rs b/src/database/key_value/rooms/search.rs index ad573f06..8a2769bd 100644 --- a/src/database/key_value/rooms/search.rs +++ b/src/database/key_value/rooms/search.rs @@ -2,24 +2,46 @@ use ruma::RoomId; use crate::{database::KeyValueDatabase, service, services, utils, Result}; +/// Splits a string into tokens used as keys in the search inverted index +/// +/// This may be used to tokenize both message bodies (for indexing) or search +/// queries (for querying). +fn tokenize(body: &str) -> impl Iterator + '_ { + body.split_terminator(|c: char| !c.is_alphanumeric()) + .filter(|s| !s.is_empty()) + .filter(|word| word.len() <= 50) + .map(str::to_lowercase) +} + impl service::rooms::search::Data for KeyValueDatabase { fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> { - let mut batch = message_body - .split_terminator(|c: char| !c.is_alphanumeric()) - .filter(|s| !s.is_empty()) - .filter(|word| word.len() <= 50) - .map(str::to_lowercase) - .map(|word| { - let mut key = shortroomid.to_be_bytes().to_vec(); - key.extend_from_slice(word.as_bytes()); - key.push(0xff); - key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here - (key, Vec::new()) - }); + let mut batch = tokenize(message_body).map(|word| { + let mut key = shortroomid.to_be_bytes().to_vec(); + key.extend_from_slice(word.as_bytes()); + key.push(0xff); + key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here + (key, Vec::new()) + }); self.tokenids.insert_batch(&mut batch) } + fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> { + let batch = tokenize(message_body).map(|word| { + let mut key = shortroomid.to_be_bytes().to_vec(); + key.extend_from_slice(word.as_bytes()); + key.push(0xFF); + key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here + key + }); + + for token in batch { + self.tokenids.remove(&token)?; + } + + Ok(()) + } + fn search_pdus<'a>( &'a self, room_id: &RoomId, @@ -33,11 +55,7 @@ impl service::rooms::search::Data for KeyValueDatabase { .to_be_bytes() .to_vec(); - let words: Vec<_> = search_string - .split_terminator(|c: char| !c.is_alphanumeric()) - .filter(|s| !s.is_empty()) - .map(str::to_lowercase) - .collect(); + let words: Vec<_> = tokenize(search_string).collect(); let iterators = words.clone().into_iter().map(move |word| { let mut prefix2 = prefix.clone(); diff --git a/src/service/pdu.rs b/src/service/pdu.rs index a51d7ec5..6991a083 100644 --- a/src/service/pdu.rs +++ b/src/service/pdu.rs @@ -72,6 +72,23 @@ impl PduEvent { Ok(()) } + pub fn is_redacted(&self) -> bool { + #[derive(Deserialize)] + struct ExtractRedactedBecause { + redacted_because: Option, + } + + let Some(unsigned) = &self.unsigned else { + return false; + }; + + let Ok(unsigned) = ExtractRedactedBecause::deserialize(&**unsigned) else { + return false; + }; + + unsigned.redacted_because.is_some() + } + pub fn remove_transaction_id(&mut self) -> crate::Result<()> { if let Some(unsigned) = &self.unsigned { let mut unsigned: BTreeMap> = diff --git a/src/service/rooms/search/data.rs b/src/service/rooms/search/data.rs index 7ea7e3d1..7dbfd56a 100644 --- a/src/service/rooms/search/data.rs +++ b/src/service/rooms/search/data.rs @@ -4,6 +4,8 @@ use ruma::RoomId; pub trait Data: Send + Sync { fn index_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()>; + fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()>; + #[allow(clippy::type_complexity)] fn search_pdus<'a>( &'a self, diff --git a/src/service/rooms/search/mod.rs b/src/service/rooms/search/mod.rs index b6f35e79..3b9de19f 100644 --- a/src/service/rooms/search/mod.rs +++ b/src/service/rooms/search/mod.rs @@ -15,6 +15,16 @@ impl Service { self.db.index_pdu(shortroomid, pdu_id, message_body) } + #[tracing::instrument(skip(self))] + pub fn deindex_pdu<'a>( + &self, + shortroomid: u64, + pdu_id: &[u8], + message_body: &str, + ) -> Result<()> { + self.db.deindex_pdu(shortroomid, pdu_id, message_body) + } + #[tracing::instrument(skip(self))] pub fn search_pdus<'a>( &'a self, diff --git a/src/service/rooms/timeline/mod.rs b/src/service/rooms/timeline/mod.rs index 6fd86d4e..9405f00d 100644 --- a/src/service/rooms/timeline/mod.rs +++ b/src/service/rooms/timeline/mod.rs @@ -399,7 +399,7 @@ impl Service { &pdu.room_id, false, )? { - self.redact_pdu(redact_id, pdu)?; + self.redact_pdu(redact_id, pdu, shortroomid)?; } } } @@ -416,7 +416,7 @@ impl Service { &pdu.room_id, false, )? { - self.redact_pdu(redact_id, pdu)?; + self.redact_pdu(redact_id, pdu, shortroomid)?; } } } @@ -1100,14 +1100,33 @@ impl Service { /// Replace a PDU with the redacted form. #[tracing::instrument(skip(self, reason))] - pub fn redact_pdu(&self, event_id: &EventId, reason: &PduEvent) -> Result<()> { + pub fn redact_pdu( + &self, + event_id: &EventId, + reason: &PduEvent, + shortroomid: u64, + ) -> Result<()> { // TODO: Don't reserialize, keep original json if let Some(pdu_id) = self.get_pdu_id(event_id)? { let mut pdu = self .get_pdu_from_id(&pdu_id)? .ok_or_else(|| Error::bad_database("PDU ID points to invalid PDU."))?; + + #[derive(Deserialize)] + struct ExtractBody { + body: String, + } + + if let Ok(content) = serde_json::from_str::(pdu.content.get()) { + services() + .rooms + .search + .deindex_pdu(shortroomid, &pdu_id, &content.body)?; + } + let room_version_id = services().rooms.state.get_room_version(&pdu.room_id)?; pdu.redact(room_version_id, reason)?; + self.replace_pdu( &pdu_id, &utils::to_canonical_object(&pdu).expect("PDU is an object"),