From 75920321062d682437f3fb0319dad227d8b18f6c Mon Sep 17 00:00:00 2001 From: Gabriele Musco Date: Sat, 25 Mar 2023 14:13:44 +0100 Subject: [PATCH] add author extraction for feed item --- src/feed_item.cpp | 12 +++++++++--- src/feed_item.hpp | 14 ++++++++++++++ src/pybind.cpp | 4 ++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/feed_item.cpp b/src/feed_item.cpp index a08cd71..e0dbb8c 100644 --- a/src/feed_item.cpp +++ b/src/feed_item.cpp @@ -1,5 +1,5 @@ -#include "feed_item.hpp" -#include "utils.hpp" +#include "./feed_item.hpp" +#include "./utils.hpp" std::string FeedItem::extract_url() { std::string res = item_node.child("link").text().as_string(); @@ -75,6 +75,10 @@ void FeedItem::parse() { // pub_date pub_date = SynDomUtils::extract_from_node(item_node, __PUB_DATE_PARAMS); + // author + author_name = SynDomUtils::extract_from_node(item_node, __AUTHOR_NAME_PARAMS); + author_url = SynDomUtils::extract_from_node(item_node, __AUTHOR_URL_PARAMS); + // img_url img_url = extract_img_url(); fix_url(img_url); @@ -87,6 +91,8 @@ std::string FeedItem::to_json() { " \"url\": \"" + url + "\",\n" " \"media_url\": \"" + media_url + "\",\n" " \"pub_date\": \"" + pub_date + "\",\n" - " \"img_url\": \"" + img_url + "\"\n" + " \"img_url\": \"" + img_url + "\",\n" + " \"author_name\": \"" + author_name + "\",\n" + " \"author_url\": \"" + author_url + "\"\n" " }"; } diff --git a/src/feed_item.hpp b/src/feed_item.hpp index 5d7105e..fd1259f 100644 --- a/src/feed_item.hpp +++ b/src/feed_item.hpp @@ -28,6 +28,8 @@ private: std::string media_url; std::string pub_date; std::string img_url; + std::string author_name; + std::string author_url; /** * Tries to extract the item url and returns it. @@ -73,6 +75,16 @@ private: {ExtractionParam::ParamType::CHILD, {"date"}}, {ExtractionParam::ParamType::CHILD, {"dc:date"}} }; + static inline const std::vector __AUTHOR_NAME_PARAMS{ + {ExtractionParam::ParamType::CHILD, {"author", "name"}}, + {ExtractionParam::ParamType::CHILD, {"author"}}, + {ExtractionParam::ParamType::CHILD, {"dc:creator"}}, + {ExtractionParam::ParamType::CHILD, {"creator"}}, + {ExtractionParam::ParamType::CHILD, {"itunes:author"}}, + }; + static inline const std::vector __AUTHOR_URL_PARAMS{ + {ExtractionParam::ParamType::CHILD, {"author", "uri"}} + }; /** * Entry point of the class, parses all the relevant content. Called by * the constructor. @@ -101,6 +113,8 @@ public: std::string get_media_url() { return media_url; } std::string get_pub_date() { return pub_date; } std::string get_img_url() { return img_url; } + std::string get_author_name() { return author_name; } + std::string get_author_url() { return author_url; } /** * Represents the FeedItem object (itself) as a json, returned as a string. diff --git a/src/pybind.cpp b/src/pybind.cpp index bef72f9..1d5a58d 100644 --- a/src/pybind.cpp +++ b/src/pybind.cpp @@ -19,11 +19,15 @@ PYBIND11_MODULE(syndom, m) { .def_property_readonly("media_url", &FeedItem::get_media_url) .def_property_readonly("pub_date", &FeedItem::get_pub_date) .def_property_readonly("img_url", &FeedItem::get_img_url) + .def_property_readonly("author_name", &FeedItem::get_author_name) + .def_property_readonly("author_url", &FeedItem::get_author_url) .def("get_title", &FeedItem::get_title) .def("get_content", &FeedItem::get_content) .def("get_url", &FeedItem::get_url) .def("get_media_url", &FeedItem::get_media_url) .def("get_pub_date", &FeedItem::get_pub_date) + .def("get_author_name", &FeedItem::get_author_name) + .def("get_author_url", &FeedItem::get_author_url) .def("get_img_url", &FeedItem::get_img_url); py::class_(m, "Feed") .def(py::init()) -- GitLab