summaryrefslogtreecommitdiff
path: root/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-epubextractor-segfault.patch
blob: b738d1ae09ac7b629ed5a9061916999546d5e484 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
From: Christoph Cullmann <cullmann@kde.org>
Date: Sun, 11 Sep 2016 17:14:51 +0000
Subject: Improve epub extractor, less segfaults
X-Git-Url: http://quickgit.kde.org/?p=kfilemetadata.git&a=commitdiff&h=47f6e57b2fa3768feb4f1f4a2cd3ce46660d90f2
---
Improve epub extractor, less segfaults

Improve epub extractor:

1) check for more nullpointers (e.g. data can be null for some fields, iterators, ...)
2) actually close the epub file again at all
3) iterator seems to handle clink as stated in docs, fix double free

e.g. see bug 361727
could be the double freed clink in the last iterator

BUG: 361727
REVIEW: 128888
---


--- a/src/extractors/epubextractor.cpp
+++ b/src/extractors/epubextractor.cpp
@@ -1,5 +1,6 @@
 /*
     Copyright (C) 2013  Vishesh Handa <me@vhanda.in>
+    Copyright (C) 2016  Christoph Cullmann <cullmann@kde.org>
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -46,11 +47,14 @@
 QString fetchMetadata(struct epub* e, const epub_metadata& type)
 {
     int size = 0;
-
     unsigned char** data = epub_get_metadata(e, type, &size);
     if (data) {
         QStringList strList;
         for (int i = 0; i < size; i++) {
+            // skip nullptr entries, can happen for broken xml files
+            if (!data[i])
+                continue;
+
             strList << QString::fromUtf8((char*)data[i]);
             free(data[i]);
         }
@@ -65,7 +69,8 @@
 
 void EPubExtractor::extract(ExtractionResult* result)
 {
-    struct epub* ePubDoc = epub_open(result->inputUrl().toUtf8().constData(), 1);
+    // open epub, return on exit, file will be closed again at end of function
+    auto ePubDoc = epub_open(result->inputUrl().toUtf8().constData(), 1);
     if (!ePubDoc) {
         qWarning() << "Invalid document";
         return;
@@ -138,49 +143,49 @@
     //
     // Plain Text
     //
-    if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
-        return;
+    if (result->inputFlags() & ExtractionResult::ExtractPlainText) {
+        if (auto iter = epub_get_iterator(ePubDoc, EITERATOR_SPINE, 0)) {
+            do {
+                char* curr = epub_it_get_curr(iter);
+                if (!curr)
+                    continue;
+
+                QString html = QString::fromUtf8(curr);
+                html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
+                result->append(html);
+            } while (epub_it_get_next(iter));
+
+            epub_free_iterator(iter);
+        }
+
+        auto tit = epub_get_titerator(ePubDoc, TITERATOR_NAVMAP, 0);
+        if (!tit) {
+            tit = epub_get_titerator(ePubDoc, TITERATOR_GUIDE, 0);
+        }
+        if (tit) {
+            if (epub_tit_curr_valid(tit)) {
+                do {
+                    // get link, iterator handles freeing of it
+                    char* clink = epub_tit_get_curr_link(tit);
+
+                    // epub_get_data returns -1 on failure
+                    char* data = nullptr;
+                    const int size = epub_get_data(ePubDoc, clink, &data);
+                    if (size >= 0 && data) {
+                        QString html = QString::fromUtf8(data, size);
+                        // strip html tags
+                        html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
+
+                        result->append(html);
+                        free(data);
+                    }
+                } while (epub_tit_next(tit));
+            }
+            epub_free_titerator(tit);
+        }
     }
 
-    struct eiterator* iter = epub_get_iterator(ePubDoc, EITERATOR_SPINE, 0);
-    do {
-        char* curr = epub_it_get_curr(iter);
-        if (!curr)
-            continue;
-        QString html = QString::fromUtf8(curr);
-        html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
-
-        result->append(html);
-    } while (epub_it_get_next(iter));
-
-    epub_free_iterator(iter);
-
-    struct titerator* tit;
-
-    tit = epub_get_titerator(ePubDoc, TITERATOR_NAVMAP, 0);
-    if (!tit) {
-        tit = epub_get_titerator(ePubDoc, TITERATOR_GUIDE, 0);
-    }
-
-    if (epub_tit_curr_valid(tit)) {
-        do {
-            char* clink = epub_tit_get_curr_link(tit);
-
-            char* data;
-            int size = epub_get_data(ePubDoc, clink, &data);
-            free(clink);
-
-            // epub_get_data returns -1 on failure
-            if (size > 0 && data) {
-                QString html = QString::fromUtf8(data, size);
-                // strip html tags
-                html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
-
-                result->append(html);
-                free(data);
-            }
-        } while (epub_tit_next(tit));
-    }
-    epub_free_titerator(tit);
+    // close epub file again
+    epub_close(ePubDoc);
 }