requests 2.21.0
requests-toolbelt 0.9.1
When uploading files using python requests, report
OverflowError: string longer than 2147483647 bytes error.
data = {}
with open("bigfile", "rb") as f:
r = requests.post(PUBLISH_URL, data=data, files={"xxx": f})
error message
Traceback (most recent call last):
File "test.py", line 52, in <module>
main()
File "test.py", line 49, in main
publish()
File "test.py", line 41, in publish
r = requests.post(PUBLISH_URL, data=cfg, files={file_key: ("./test.apk", f)})
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 116, in post
return request('post', url, data=data, json=json, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/usr/local/lib/python2.7/dist-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/usr/local/lib/python2.7/dist-packages/urllib3/connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python2.7/httplib.py", line 1057, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python2.7/httplib.py", line 1097, in _send_request
self.endheaders(body)
File "/usr/lib/python2.7/httplib.py", line 1053, in endheaders
self._send_output(message_body)
File "/usr/lib/python2.7/httplib.py", line 897, in _send_output
self.send(msg)
File "/usr/lib/python2.7/httplib.py", line 873, in send
self.sock.sendall(data)
File "/usr/lib/python2.7/ssl.py", line 743, in sendall
v = self.send(data[count:])
File "/usr/lib/python2.7/ssl.py", line 709, in send
v = self._sslobj.write(data)
OverflowError: string longer than 2147483647 bytes
requests read all file obj into memory
Unexpectedly, the implementation of requests is so crude, directly file.read()
, in fact, the same is true, when sending large files, the memory rises rapidly. The code is as follows:
requests/models.py
@staticmethod
def _encode_files(files, data):
"""Build the body for a multipart/form-data request.
Will successfully encode files when passed as a dict or a list of
tuples. Order is retained if data is a list of tuples but arbitrary
if parameters are supplied as a dict.
The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
or 4-tuples (filename, fileobj, contentype, custom_headers).
"""
if (not files):
raise ValueError("Files must be provided.")
elif isinstance(data, basestring):
raise ValueError("Data must not be a string.")
new_fields = []
fields = to_key_val_list(data or {})
files = to_key_val_list(files or {})
for field, val in fields:
if isinstance(val, basestring) or not hasattr(val, '__iter__'):
val = [val]
for v in val:
if v is not None:
# Don't call str() on bytestrings: in Py3 it all goes wrong.
if not isinstance(v, bytes):
v = str(v)
new_fields.append(
(field.decode('utf-8') if isinstance(field, bytes) else field,
v.encode('utf-8') if isinstance(v, str) else v))
for (k, v) in files:
# support for explicit filename
ft = None
fh = None
if isinstance(v, (tuple, list)):
if len(v) == 2:
fn, fp = v
elif len(v) == 3:
fn, fp, ft = v
else:
fn, fp, ft, fh = v
else:
fn = guess_filename(v) or k
fp = v
if isinstance(fp, (str, bytes, bytearray)):
fdata = fp
elif hasattr(fp, 'read'):
fdata = fp.read() # Read all files into memory here
elif fp is None:
continue
else:
fdata = fp
rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
rf.make_multipart(content_type=ft)
new_fields.append(rf)
body, content_type = encode_multipart_formdata(new_fields)
return body, content_type
from requests_toolbelt import MultipartEncoder
data = {}
with open("bigfile", "rb") as f:
data["xxx"] = ("filename", f)
m = MultipartEncoder(fields=data)
r = requests.post(PUBLISH_URL, data=m, headers={'Content-Type': m.content_type})
The implementation of sending files by requests is very rude, it will read all the file content directly to the memory and then sign.If the ssl sign is greater than 2GB, an error will be reported.Official documents recommend using requests-toolbelt to upload large files.
Multipart upload is of course also a solution (if the server supports it).