Bug 1368948: Add a script to update PDFium from upstream.
MozReview-Commit-ID: VasYAthlZJ
new file mode 100644
--- /dev/null
+++ b/widget/third_party/pdfium/README_MOZILLA
@@ -0,0 +1,8 @@
+PDFium is a PDF library to view, search, print, and form fill PDF files.
+
+The source in this directory was copied from upstream by running the
+update.py script from layout/pdfium. Any changes made relative to upstream
+should be reflected in that script, e.g. by applying patch files after the
+copy step.
+
+The upstream repository is https://pdfium.googlesource.com/pdfium
new file mode 100644
--- /dev/null
+++ b/widget/third_party/pdfium/update.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import argparse
+import os
+import re
+import shutil
+import datetime
+import tarfile
+import urllib
+
+def debug_print(message):
+ if DEBUG:
+ print(message)
+
+def parse_upstream(commit_selector):
+ upstream_url = 'https://pdfium.googlesource.com/pdfium/+/' + commit_selector
+
+ text = urllib.urlopen(upstream_url).readlines()
+ text = "".join(text)
+ tree_regex = '<tr><th class="Metadata-title">tree</th><td><a href="/pdfium/\+/([\w/]+)/">[0-9a-zA-Z]+</a></td></tr>'
+ tree = re.search(tree_regex, text).groups(0)[0].strip()
+ commit_regex = '<tr><th class="Metadata-title">commit</th><td>\W*([0-9a-fA-F]+)\W*</td>.*</tr>'
+ commit = re.search(commit_regex, text).groups(0)[0].strip()
+ date_regex = '<tr><th class="Metadata-title">committer</th><td>.+</td><td>[^\s]+ ([0-9a-zA-Z: ]+)\s*\+*[0-9]*</td></tr>'
+ date = re.search(date_regex, text).groups(0)[0].strip()
+
+ if tree == commit:
+ # commit_selector is the commit hash; there is no meaningful tree information
+ tree = None
+ debug_print('Updating commit %s (%s)' % (commit, date))
+ else:
+ # commit_selector is the tree name
+ debug_print('Updating commit %s (%s) on tree %s' % (commit, date, tree))
+ return (tree, commit, datetime.datetime.strptime(date, "%b %d %H:%M:%S %Y"))
+
+def prepare_upstream(base, commit):
+ upstream_url = 'https://pdfium.googlesource.com/pdfium' + '/+archive/' + commit + '.tar.gz'
+ archive_path = os.path.join(base, 'pdfium.tar.gz')
+ folder_path = os.path.join(base, 'pdfium')
+
+ if os.path.exists(folder_path):
+ debug_print('Removing ' + folder_path)
+ shutil.rmtree(folder_path)
+
+ debug_print('Downloading ' + upstream_url + ' as ' + archive_path)
+ urllib.urlretrieve(upstream_url, archive_path)
+
+ debug_print('Extracting ' + archive_path + ' as ' + folder_path)
+ tarfile.open(archive_path).extractall(path=folder_path)
+
+ debug_print('Removing ' + archive_path)
+ os.remove(archive_path)
+ return commit
+
+def cleanup_upstream(base):
+ upstream_folder = os.path.join(base, 'pdfium')
+
+ # Remove irrelevant source control configurations
+ debug_print('Removing pdfium/.gitignore')
+ os.remove(os.path.join(upstream_folder, '.gitignore'))
+
+ # Gecko uses freetype under /modules/freetype2
+ debug_print('Removing pdfium/third_party/freetype')
+ shutil.rmtree(os.path.join(upstream_folder, 'third_party/freetype'))
+
+ # Gecko uses libjpeg under /media/libjpeg
+ debug_print('Removing pdfium/third_party/libjpeg')
+ shutil.rmtree(os.path.join(upstream_folder, 'third_party/libjpeg'))
+
+ # Gecko uses zlib under /modules/zlib
+ debug_print('Removing pdfium/third_party/zlib_v128')
+ shutil.rmtree(os.path.join(upstream_folder, 'third_party/zlib_v128'))
+
+ # Gecko doesn't use libpng due to XFA features disabled
+ debug_print('Removing pdfium/third_party/libpng16')
+ shutil.rmtree(os.path.join(upstream_folder, 'third_party/libpng16'))
+
+ # Gecko doesn't uses libtiff due to XFA features disabled
+ debug_print('Removing pdfium/third_party/libtiff')
+ shutil.rmtree(os.path.join(upstream_folder, 'third_party/libtiff'))
+
+ # Gecko doesn't uses pymock due to no pymock tests
+ debug_print('Removing pdfium/third_party/pymock')
+ shutil.rmtree(os.path.join(upstream_folder, 'third_party/pymock'))
+
+def apply_patches(base):
+ os.chdir(base)
+ # Apply patches, ex:
+ # os.system("patch -p4 < some.patch")
+ return
+
+def update_readme(base, tree, commit, commitdate):
+ version_prefix = 'The git commit ID last used to import was '
+ version_regex = version_prefix + '[0-9a-fA-F]+ \(.+\)'
+ version_string = version_prefix + '%s (%s)' % (commit, commitdate)
+
+ os.chdir(base)
+
+ with open('README_MOZILLA') as f:
+ debug_print('Reading README_MOZILLA')
+ readme = f.read()
+
+ if version_prefix in readme:
+ new_readme = re.sub(version_regex, version_string, readme)
+ else:
+ new_readme = "%s\n\n%s\n" % (readme.rstrip('\n'), version_string)
+
+ if readme != new_readme:
+ with open('README_MOZILLA', 'w') as f:
+ debug_print('Updating README_MOZILLA')
+ f.write(new_readme)
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Update Tool for PDFium Library')
+ parser.add_argument('--debug', dest='debug', action="store_true",
+ help='show debug messages')
+ parser.add_argument('--commit', dest='commit', type=str, default='master',
+ help='specify a commit hash or a branch name (default: master)')
+
+ args = parser.parse_args()
+ DEBUG = args.debug
+ (TREE, COMMIT, COMMITDATE) = parse_upstream(args.commit)
+ BASE = os.path.dirname(os.path.abspath(__file__))
+
+ prepare_upstream(BASE, COMMIT)
+ apply_patches(BASE)
+ update_readme(BASE, TREE, COMMIT, COMMITDATE)
+ cleanup_upstream(BASE)