Bug 1368948: Add a script to update PDFium from upstream. draft
authorBruce Sun <brsun@mozilla.com>
Wed, 14 Jun 2017 14:09:38 +0800
changeset 593822 119c53bd24d4177444582baff6f1e388d43fb2cd
parent 592321 042829191d730dd6e901d09fb62bbf4274fe5895
child 593823 889177a6f59b780f821c763530a4079e47044b8d
push id63822
push userbmo:brsun@mozilla.com
push dateWed, 14 Jun 2017 06:17:45 +0000
bugs1368948
milestone55.0a1
Bug 1368948: Add a script to update PDFium from upstream. MozReview-Commit-ID: VasYAthlZJ
widget/third_party/pdfium/README_MOZILLA
widget/third_party/pdfium/update.py
new file mode 100644
--- /dev/null
+++ b/widget/third_party/pdfium/README_MOZILLA
@@ -0,0 +1,8 @@
+PDFium is a PDF library to view, search, print, and form fill PDF files.
+
+The source in this directory was copied from upstream by running the
+update.py script from layout/pdfium. Any changes made relative to upstream
+should be reflected in that script, e.g. by applying patch files after the
+copy step.
+
+The upstream repository is https://pdfium.googlesource.com/pdfium
new file mode 100644
--- /dev/null
+++ b/widget/third_party/pdfium/update.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import argparse
+import os
+import re
+import shutil
+import datetime
+import tarfile
+import urllib
+
+def debug_print(message):
+    if DEBUG:
+        print(message)
+
+def parse_upstream(commit_selector):
+    upstream_url = 'https://pdfium.googlesource.com/pdfium/+/' + commit_selector
+
+    text = urllib.urlopen(upstream_url).readlines()
+    text = "".join(text)
+    tree_regex = '<tr><th class="Metadata-title">tree</th><td><a href="/pdfium/\+/([\w/]+)/">[0-9a-zA-Z]+</a></td></tr>'
+    tree = re.search(tree_regex, text).groups(0)[0].strip()
+    commit_regex = '<tr><th class="Metadata-title">commit</th><td>\W*([0-9a-fA-F]+)\W*</td>.*</tr>'
+    commit = re.search(commit_regex, text).groups(0)[0].strip()
+    date_regex = '<tr><th class="Metadata-title">committer</th><td>.+</td><td>[^\s]+ ([0-9a-zA-Z: ]+)\s*\+*[0-9]*</td></tr>'
+    date = re.search(date_regex, text).groups(0)[0].strip()
+
+    if tree == commit:
+        # commit_selector is the commit hash; there is no meaningful tree information
+        tree = None
+        debug_print('Updating commit %s (%s)' % (commit, date))
+    else:
+        # commit_selector is the tree name
+        debug_print('Updating commit %s (%s) on tree %s' % (commit, date, tree))
+    return (tree, commit, datetime.datetime.strptime(date, "%b %d %H:%M:%S %Y"))
+
+def prepare_upstream(base, commit):
+    upstream_url = 'https://pdfium.googlesource.com/pdfium' + '/+archive/' + commit + '.tar.gz'
+    archive_path = os.path.join(base, 'pdfium.tar.gz')
+    folder_path = os.path.join(base, 'pdfium')
+
+    if os.path.exists(folder_path):
+        debug_print('Removing ' + folder_path)
+        shutil.rmtree(folder_path)
+
+    debug_print('Downloading ' + upstream_url + ' as ' + archive_path)
+    urllib.urlretrieve(upstream_url, archive_path)
+
+    debug_print('Extracting ' + archive_path + ' as ' + folder_path)
+    tarfile.open(archive_path).extractall(path=folder_path)
+
+    debug_print('Removing ' + archive_path)
+    os.remove(archive_path)
+    return commit
+
+def cleanup_upstream(base):
+    upstream_folder = os.path.join(base, 'pdfium')
+
+    # Remove irrelevant source control configurations
+    debug_print('Removing pdfium/.gitignore')
+    os.remove(os.path.join(upstream_folder, '.gitignore'))
+
+    # Gecko uses freetype under /modules/freetype2
+    debug_print('Removing pdfium/third_party/freetype')
+    shutil.rmtree(os.path.join(upstream_folder, 'third_party/freetype'))
+
+    # Gecko uses libjpeg under /media/libjpeg
+    debug_print('Removing pdfium/third_party/libjpeg')
+    shutil.rmtree(os.path.join(upstream_folder, 'third_party/libjpeg'))
+
+    # Gecko uses zlib under /modules/zlib
+    debug_print('Removing pdfium/third_party/zlib_v128')
+    shutil.rmtree(os.path.join(upstream_folder, 'third_party/zlib_v128'))
+
+    # Gecko doesn't use libpng due to XFA features disabled
+    debug_print('Removing pdfium/third_party/libpng16')
+    shutil.rmtree(os.path.join(upstream_folder, 'third_party/libpng16'))
+
+    # Gecko doesn't uses libtiff due to XFA features disabled
+    debug_print('Removing pdfium/third_party/libtiff')
+    shutil.rmtree(os.path.join(upstream_folder, 'third_party/libtiff'))
+
+    # Gecko doesn't uses pymock due to no pymock tests
+    debug_print('Removing pdfium/third_party/pymock')
+    shutil.rmtree(os.path.join(upstream_folder, 'third_party/pymock'))
+
+def apply_patches(base):
+    os.chdir(base)
+    # Apply patches, ex:
+    # os.system("patch -p4 < some.patch")
+    return
+
+def update_readme(base, tree, commit, commitdate):
+    version_prefix = 'The git commit ID last used to import was '
+    version_regex = version_prefix + '[0-9a-fA-F]+ \(.+\)'
+    version_string = version_prefix + '%s (%s)' % (commit, commitdate)
+
+    os.chdir(base)
+
+    with open('README_MOZILLA') as f:
+        debug_print('Reading README_MOZILLA')
+        readme = f.read()
+
+    if version_prefix in readme:
+        new_readme = re.sub(version_regex, version_string, readme)
+    else:
+        new_readme = "%s\n\n%s\n" % (readme.rstrip('\n'), version_string)
+
+    if readme != new_readme:
+        with open('README_MOZILLA', 'w') as f:
+            debug_print('Updating README_MOZILLA')
+            f.write(new_readme)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Update Tool for PDFium Library')
+    parser.add_argument('--debug', dest='debug', action="store_true",
+                        help='show debug messages')
+    parser.add_argument('--commit', dest='commit', type=str, default='master',
+                        help='specify a commit hash or a branch name (default: master)')
+
+    args = parser.parse_args()
+    DEBUG = args.debug
+    (TREE, COMMIT, COMMITDATE) = parse_upstream(args.commit)
+    BASE = os.path.dirname(os.path.abspath(__file__))
+
+    prepare_upstream(BASE, COMMIT)
+    apply_patches(BASE)
+    update_readme(BASE, TREE, COMMIT, COMMITDATE)
+    cleanup_upstream(BASE)