With the exeception of Common Lisp, these are the actual implementations of these functions as distributed with the language runtime / standard library. Hurray for Free Software. Since the CL spec doesn't include a join function, the one included here is my own. Also available is the one in the cl-rms-string library, which is 6 SLOC.
| Language | SLOC |
|---|---|
| Common Lisp | 4-6 |
| Scheme (srfi-13) | 24 |
| PHP | 25 |
| TCL | 34 |
| Ruby | 45 |
| Perl | 46 |
| SWI Prolog | 57 |
| Java | 124 |
| Python | 189 |
(defun intersperse (thing list) "Put thing between each of the elements in list. (intersperse '|,| '(1 2 3)) => (1 |,| 2 |,| 3)" (rest (mapcan (lambda (x) (list thing x)) list))) (defun intersperse-string (delimiter list) "A convient bare-string output wrapper for intersperse" (apply #'concatenate 'string (intersperse delimiter list)))
;;; (string-join string-list [delimiter grammar]) => string ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Paste strings together using the delimiter string. ;;; ;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" ;;; ;;; DELIMITER defaults to a single space " " ;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix} ;;; and defaults to 'infix. ;;; ;;; I could rewrite this more efficiently -- precompute the length of the ;;; answer string, then allocate & fill it in iteratively. Using ;;; STRING-CONCATENATE is less efficient. (define (string-join strings . delim+grammar) (let-optionals* delim+grammar ((delim " " (string? delim)) (grammar 'infix)) (let ((buildit (lambda (lis final) (let recur ((lis lis)) (if (pair? lis) (cons delim (cons (car lis) (recur (cdr lis)))) final))))) (cond ((pair? strings) (string-concatenate (case grammar ((infix strict-infix) (cons (car strings) (buildit (cdr strings) '()))) ((prefix) (buildit strings '())) ((suffix) (cons (car strings) (buildit (cdr strings) (list delim)))) (else (error "Illegal join grammar" grammar string-join))))) ((not (null? strings)) (error "STRINGS parameter not list." strings string-join)) ;; STRINGS is () ((eq? grammar 'strict-infix) (error "Empty list cannot be joined with STRICT-INFIX grammar." string-join)) (else ""))))) ; Special-cased for infix grammar.
PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value)
{
zval **tmp;
HashPosition pos;
smart_str implstr = {0};
int numelems, i = 0;
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr));
if (numelems == 0) {
RETURN_EMPTY_STRING();
}
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **) &tmp, &pos) == SUCCESS) {
if ((*tmp)->type != IS_STRING) {
SEPARATE_ZVAL(tmp);
convert_to_string(*tmp);
}
smart_str_appendl(&implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp));
if (++i != numelems) {
smart_str_appendl(&implstr, Z_STRVAL_P(delim), Z_STRLEN_P(delim));
}
zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos);
}
smart_str_0(&implstr);
RETURN_STRINGL(implstr.c, implstr.len, 0);
}
/* *---------------------------------------------------------------------- * * Tcl_JoinObjCmd -- * * This procedure is invoked to process the "join" Tcl command. * See the user documentation for details on what it does. * * Results: * A standard Tcl object result. * * Side effects: * See the user documentation. * *---------------------------------------------------------------------- */ /* ARGSUSED */ int Tcl_JoinObjCmd(dummy, interp, objc, objv) ClientData dummy; /* Not used. */ Tcl_Interp *interp; /* Current interpreter. */ int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* The argument objects. */ { char *joinString, *bytes; int joinLength, listLen, length, i, result; Tcl_Obj **elemPtrs; Tcl_Obj *resObjPtr; if (objc == 2) { joinString = " "; joinLength = 1; } else if (objc == 3) { joinString = Tcl_GetStringFromObj(objv[2], &joinLength); } else { Tcl_WrongNumArgs(interp, 1, objv, "list ?joinString?"); return TCL_ERROR; } /* * Make sure the list argument is a list object and get its length and * a pointer to its array of element pointers. */ result = Tcl_ListObjGetElements(interp, objv[1], &listLen, &elemPtrs); if (result != TCL_OK) { return result; } /* * Now concatenate strings to form the "joined" result. We append * directly into the interpreter's result object. */ resObjPtr = Tcl_GetObjResult(interp); for (i = 0; i < listLen; i++) { bytes = Tcl_GetStringFromObj(elemPtrs[i], &length); if (i > 0) { Tcl_AppendToObj(resObjPtr, joinString, joinLength); } Tcl_AppendToObj(resObjPtr, bytes, length); } return TCL_OK; }
VALUE
rb_ary_join(ary, sep)
VALUE ary, sep;
{
long len = 1, i;
int taint = Qfalse;
VALUE result, tmp;
if (RARRAY(ary)->len == 0) return rb_str_new(0, 0);
if (OBJ_TAINTED(ary) || OBJ_TAINTED(sep)) taint = Qtrue;
for (i=0; i<RARRAY(ary)->len; i++) {
tmp = rb_check_string_type(RARRAY(ary)->ptr[i]);
len += NIL_P(tmp) ? 10 : RSTRING(tmp)->len;
}
if (!NIL_P(sep)) {
StringValue(sep);
len += RSTRING(sep)->len * (RARRAY(ary)->len - 1);
}
result = rb_str_buf_new(len);
for (i=0; i<RARRAY(ary)->len; i++) {
tmp = RARRAY(ary)->ptr[i];
switch (TYPE(tmp)) {
case T_STRING:
break;
case T_ARRAY:
if (rb_inspecting_p(tmp)) {
tmp = rb_str_new2("[...]");
}
else {
VALUE args[2];
args[0] = tmp;
args[1] = sep;
tmp = rb_protect_inspect(inspect_join, ary, (VALUE)args);
}
break;
default:
tmp = rb_obj_as_string(tmp);
}
if (i > 0 && !NIL_P(sep))
rb_str_buf_append(result, sep);
rb_str_buf_append(result, tmp);
if (OBJ_TAINTED(tmp)) taint = Qtrue;
}
if (taint) OBJ_TAINT(result);
return result;
}
void
Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
{
SV ** const oldmark = mark;
register I32 items = sp - mark;
register STRLEN len;
STRLEN delimlen;
(void) SvPV_const(del, delimlen); /* stringify and get the delimlen */
/* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
mark++;
len = (items > 0 ? (delimlen * (items - 1) ) : 0);
(void)SvUPGRADE(sv, SVt_PV);
if (SvLEN(sv) < len + items) { /* current length is way too short */
while (items-- > 0) {
if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
STRLEN tmplen;
SvPV_const(*mark, tmplen);
len += tmplen;
}
mark++;
}
SvGROW(sv, len + 1); /* so try to pre-extend */
mark = oldmark;
items = sp - mark;
++mark;
}
sv_setpvn(sv, "", 0);
/* sv_setpv retains old UTF8ness [perl #24846] */
SvUTF8_off(sv);
if (PL_tainting && SvMAGICAL(sv))
SvTAINTED_off(sv);
if (items-- > 0) {
if (*mark)
sv_catsv(sv, *mark);
mark++;
}
if (delimlen) {
for (; items > 0; items--,mark++) {
sv_catsv(sv,del);
sv_catsv(sv,*mark);
}
}
else {
for (; items > 0; items--,mark++)
sv_catsv(sv,*mark);
}
SvSETMAGIC(sv);
}
word
pl_concat_atom3(term_t list, term_t sep, term_t atom)
{ term_t l = PL_copy_term_ref(list);
term_t head = PL_new_term_ref();
IOENC enc = ENC_ISO_LATIN_1;
tmp_buffer b;
PL_chars_t st; /* separator text */
int ntxt = 0;
if ( sep && !PL_get_text(sep, &st, CVT_ATOMIC) )
return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_text, sep);
initBuffer(&b);
while( PL_get_list(l, head, l) )
{ PL_chars_t txt;
if ( !PL_get_text(head, &txt, CVT_ATOMIC) )
{ discardBuffer(&b);
switch(split_atom(list, sep, atom))
{ case -1:
return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_text, head);
case 0:
fail;
default:
succeed;
}
}
if ( ntxt > 0 && sep )
append_text_to_buffer((Buffer)&b, &st, &enc);
append_text_to_buffer((Buffer)&b, &txt, &enc);
PL_free_text(&txt);
ntxt++;
}
if ( PL_get_nil(l) )
{ PL_chars_t sum;
int rc;
sum.encoding = enc;
sum.storage = PL_CHARS_HEAP;
sum.canonical = TRUE;
if ( enc == ENC_ISO_LATIN_1 )
{ sum.text.t = baseBuffer(&b, char);
sum.length = entriesBuffer(&b, char);
} else
{ sum.text.w = baseBuffer(&b, pl_wchar_t);
sum.length = entriesBuffer(&b, pl_wchar_t);
}
rc = PL_unify_text(atom, 0, &sum, PL_ATOM);
discardBuffer(&b);
return rc;
}
discardBuffer(&b);
switch(split_atom(list, sep, atom))
{ case -1:
return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_list, l);
case 0:
fail;
default:
succeed;
}
}
// Joining //----------------------------------------------------------------------- /** * <p>Concatenates elements of an array into a single String. * Null objects or empty strings within the array are represented by * empty strings.</p> * * <pre> * StringUtils.concatenate(null) = null * StringUtils.concatenate([]) = "" * StringUtils.concatenate([null]) = "" * StringUtils.concatenate(["a", "b", "c"]) = "abc" * StringUtils.concatenate([null, "", "a"]) = "a" * </pre> * * @param array the array of values to concatenate, may be null * @return the concatenated String, <code>null</code> if null array input * @deprecated Use the better named {@link #join(Object[])} instead. * Method will be removed in Commons Lang 3.0. */ public static String concatenate(Object[] array) { return join(array, null); } /** * <p>Joins the elements of the provided array into a single String * containing the provided list of elements.</p> * * <p>No separator is added to the joined String. * Null objects or empty strings within the array are represented by * empty strings.</p> * * <pre> * StringUtils.join(null) = null * StringUtils.join([]) = "" * StringUtils.join([null]) = "" * StringUtils.join(["a", "b", "c"]) = "abc" * StringUtils.join([null, "", "a"]) = "a" * </pre> * * @param array the array of values to join together, may be null * @return the joined String, <code>null</code> if null array input * @since 2.0 */ public static String join(Object[] array) { return join(array, null); } /** * <p>Joins the elements of the provided array into a single String * containing the provided list of elements.</p> * * <p>No delimiter is added before or after the list. * Null objects or empty strings within the array are represented by * empty strings.</p> * * <pre> * StringUtils.join(null, *) = null * StringUtils.join([], *) = "" * StringUtils.join([null], *) = "" * StringUtils.join(["a", "b", "c"], ';') = "a;b;c" * StringUtils.join(["a", "b", "c"], null) = "abc" * StringUtils.join([null, "", "a"], ';') = ";;a" * </pre> * * @param array the array of values to join together, may be null * @param separator the separator character to use * @return the joined String, <code>null</code> if null array input * @since 2.0 */ public static String join(Object[] array, char separator) { if (array == null) { return null; } return join(array, separator, 0, array.length); } /** * <p>Joins the elements of the provided array into a single String * containing the provided list of elements.</p> * * <p>No delimiter is added before or after the list. * Null objects or empty strings within the array are represented by * empty strings.</p> * * <pre> * StringUtils.join(null, *) = null * StringUtils.join([], *) = "" * StringUtils.join([null], *) = "" * StringUtils.join(["a", "b", "c"], ';') = "a;b;c" * StringUtils.join(["a", "b", "c"], null) = "abc" * StringUtils.join([null, "", "a"], ';') = ";;a" * </pre> * * @param array the array of values to join together, may be null * @param separator the separator character to use * @param startIndex the first index to start joining from. It is * an error to pass in an end index past the end of the array * @param endIndex the index to stop joining from (exclusive). It is * an error to pass in an end index past the end of the array * @return the joined String, <code>null</code> if null array input * @since 2.0 */ public static String join(Object[] array, char separator, int startIndex, int endIndex) { if (array == null) { return null; } int bufSize = (endIndex - startIndex); if (bufSize <= 0) { return EMPTY; } bufSize *= ((array[startIndex] == null ? 16 : array[startIndex].toString().length()) + 1); StringBuffer buf = new StringBuffer(bufSize); for (int i = startIndex; i < endIndex; i++) { if (i > startIndex) { buf.append(separator); } if (array[i] != null) { buf.append(array[i]); } } return buf.toString(); } /** * <p>Joins the elements of the provided array into a single String * containing the provided list of elements.</p> * * <p>No delimiter is added before or after the list. * A <code>null</code> separator is the same as an empty String (""). * Null objects or empty strings within the array are represented by * empty strings.</p> * * <pre> * StringUtils.join(null, *) = null * StringUtils.join([], *) = "" * StringUtils.join([null], *) = "" * StringUtils.join(["a", "b", "c"], "--") = "a--b--c" * StringUtils.join(["a", "b", "c"], null) = "abc" * StringUtils.join(["a", "b", "c"], "") = "abc" * StringUtils.join([null, "", "a"], ',') = ",,a" * </pre> * * @param array the array of values to join together, may be null * @param separator the separator character to use, null treated as "" * @return the joined String, <code>null</code> if null array input */ public static String join(Object[] array, String separator) { if (array == null) { return null; } return join(array, separator, 0, array.length); } /** * <p>Joins the elements of the provided array into a single String * containing the provided list of elements.</p> * * <p>No delimiter is added before or after the list. * A <code>null</code> separator is the same as an empty String (""). * Null objects or empty strings within the array are represented by * empty strings.</p> * * <pre> * StringUtils.join(null, *) = null * StringUtils.join([], *) = "" * StringUtils.join([null], *) = "" * StringUtils.join(["a", "b", "c"], "--") = "a--b--c" * StringUtils.join(["a", "b", "c"], null) = "abc" * StringUtils.join(["a", "b", "c"], "") = "abc" * StringUtils.join([null, "", "a"], ',') = ",,a" * </pre> * * @param array the array of values to join together, may be null * @param separator the separator character to use, null treated as "" * @param startIndex the first index to start joining from. It is * an error to pass in an end index past the end of the array * @param endIndex the index to stop joining from (exclusive). It is * an error to pass in an end index past the end of the array * @return the joined String, <code>null</code> if null array input */ public static String join(Object[] array, String separator, int startIndex, int endIndex) { if (array == null) { return null; } if (separator == null) { separator = EMPTY; } // endIndex - startIndex > 0: Len = NofStrings *(len(firstString) + len(separator)) // (Assuming that all Strings are roughly equally long) int bufSize = (endIndex - startIndex); if (bufSize <= 0) { return EMPTY; } bufSize *= ((array[startIndex] == null ? 16 : array[startIndex].toString().length()) + separator.length()); StringBuffer buf = new StringBuffer(bufSize); for (int i = startIndex; i < endIndex; i++) { if (i > startIndex) { buf.append(separator); } if (array[i] != null) { buf.append(array[i]); } } return buf.toString(); } /** * <p>Joins the elements of the provided <code>Iterator</code> into * a single String containing the provided elements.</p> * * <p>No delimiter is added before or after the list. Null objects or empty * strings within the iteration are represented by empty strings.</p> * * <p>See the examples here: {@link #join(Object[],char)}. </p> * * @param iterator the <code>Iterator</code> of values to join together, may be null * @param separator the separator character to use * @return the joined String, <code>null</code> if null iterator input * @since 2.0 */ public static String join(Iterator iterator, char separator) { // handle null, zero and one elements before building a buffer if (iterator == null) { return null; } if (!iterator.hasNext()) { return EMPTY; } Object first = iterator.next(); if (!iterator.hasNext()) { return ObjectUtils.toString(first); } // two or more elements StringBuffer buf = new StringBuffer(256); // Java default is 16, probably too small if (first != null) { buf.append(first); } while (iterator.hasNext()) { buf.append(separator); Object obj = iterator.next(); if (obj != null) { buf.append(obj); } } return buf.toString(); } /** * <p>Joins the elements of the provided <code>Iterator</code> into * a single String containing the provided elements.</p> * * <p>No delimiter is added before or after the list. * A <code>null</code> separator is the same as an empty String ("").</p> * * <p>See the examples here: {@link #join(Object[],String)}. </p> * * @param iterator the <code>Iterator</code> of values to join together, may be null * @param separator the separator character to use, null treated as "" * @return the joined String, <code>null</code> if null iterator input */ public static String join(Iterator iterator, String separator) { // handle null, zero and one elements before building a buffer if (iterator == null) { return null; } if (!iterator.hasNext()) { return EMPTY; } Object first = iterator.next(); if (!iterator.hasNext()) { return ObjectUtils.toString(first); } // two or more elements StringBuffer buf = new StringBuffer(256); // Java default is 16, probably too small if (first != null) { buf.append(first); } while (iterator.hasNext()) { if (separator != null) { buf.append(separator); } Object obj = iterator.next(); if (obj != null) { buf.append(obj); } } return buf.toString(); } /** * <p>Joins the elements of the provided <code>Collection</code> into * a single String containing the provided elements.</p> * * <p>No delimiter is added before or after the list. Null objects or empty * strings within the iteration are represented by empty strings.</p> * * <p>See the examples here: {@link #join(Object[],char)}. </p> * * @param collection the <code>Collection</code> of values to join together, may be null * @param separator the separator character to use * @return the joined String, <code>null</code> if null iterator input * @since 2.3 */ public static String join(Collection collection, char separator) { if (collection == null) { return null; } return join(collection.iterator(), separator); } /** * <p>Joins the elements of the provided <code>Collection</code> into * a single String containing the provided elements.</p> * * <p>No delimiter is added before or after the list. * A <code>null</code> separator is the same as an empty String ("").</p> * * <p>See the examples here: {@link #join(Object[],String)}. </p> * * @param collection the <code>Collection</code> of values to join together, may be null * @param separator the separator character to use, null treated as "" * @return the joined String, <code>null</code> if null iterator input * @since 2.3 */ public static String join(Collection collection, String separator) { if (collection == null) { return null; } return join(collection.iterator(), separator); }
PyObject *
PyUnicode_Join(PyObject *separator, PyObject *seq)
{
PyObject *internal_separator = NULL;
const Py_UNICODE blank = ' ';
const Py_UNICODE *sep = ␣
Py_ssize_t seplen = 1;
PyUnicodeObject *res = NULL; /* the result */
Py_ssize_t res_alloc = 100; /* # allocated bytes for string in res */
Py_ssize_t res_used; /* # used bytes */
Py_UNICODE *res_p; /* pointer to free byte in res's string area */
PyObject *fseq; /* PySequence_Fast(seq) */
Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
PyObject *item;
Py_ssize_t i;
fseq = PySequence_Fast(seq, "");
if (fseq == NULL) {
return NULL;
}
/* Grrrr. A codec may be invoked to convert str objects to
* Unicode, and so it's possible to call back into Python code
* during PyUnicode_FromObject(), and so it's possible for a sick
* codec to change the size of fseq (if seq is a list). Therefore
* we have to keep refetching the size -- can't assume seqlen
* is invariant.
*/
seqlen = PySequence_Fast_GET_SIZE(fseq);
/* If empty sequence, return u"". */
if (seqlen == 0) {
res = _PyUnicode_New(0); /* empty sequence; return u"" */
goto Done;
}
/* If singleton sequence with an exact Unicode, return that. */
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(fseq, 0);
if (PyUnicode_CheckExact(item)) {
Py_INCREF(item);
res = (PyUnicodeObject *)item;
goto Done;
}
}
/* At least two items to join, or one that isn't exact Unicode. */
if (seqlen > 1) {
/* Set up sep and seplen -- they're needed. */
if (separator == NULL) {
sep = ␣
seplen = 1;
}
else {
internal_separator = PyUnicode_FromObject(separator);
if (internal_separator == NULL)
goto onError;
sep = PyUnicode_AS_UNICODE(internal_separator);
seplen = PyUnicode_GET_SIZE(internal_separator);
/* In case PyUnicode_FromObject() mutated seq. */
seqlen = PySequence_Fast_GET_SIZE(fseq);
}
}
/* Get space. */
res = _PyUnicode_New(res_alloc);
if (res == NULL)
goto onError;
res_p = PyUnicode_AS_UNICODE(res);
res_used = 0;
for (i = 0; i < seqlen; ++i) {
Py_ssize_t itemlen;
Py_ssize_t new_res_used;
item = PySequence_Fast_GET_ITEM(fseq, i);
/* Convert item to Unicode. */
if (! PyUnicode_Check(item) && ! PyString_Check(item)) {
PyErr_Format(PyExc_TypeError,
"sequence item %zd: expected string or Unicode,"
" %.80s found",
i, item->ob_type->tp_name);
goto onError;
}
item = PyUnicode_FromObject(item);
if (item == NULL)
goto onError;
/* We own a reference to item from here on. */
/* In case PyUnicode_FromObject() mutated seq. */
seqlen = PySequence_Fast_GET_SIZE(fseq);
/* Make sure we have enough space for the separator and the item. */
itemlen = PyUnicode_GET_SIZE(item);
new_res_used = res_used + itemlen;
if (new_res_used < 0)
goto Overflow;
if (i < seqlen - 1) {
new_res_used += seplen;
if (new_res_used < 0)
goto Overflow;
}
if (new_res_used > res_alloc) {
/* double allocated size until it's big enough */
do {
res_alloc += res_alloc;
if (res_alloc <= 0)
goto Overflow;
} while (new_res_used > res_alloc);
if (_PyUnicode_Resize(&res, res_alloc) < 0) {
Py_DECREF(item);
goto onError;
}
res_p = PyUnicode_AS_UNICODE(res) + res_used;
}
/* Copy item, and maybe the separator. */
Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), itemlen);
res_p += itemlen;
if (i < seqlen - 1) {
Py_UNICODE_COPY(res_p, sep, seplen);
res_p += seplen;
}
Py_DECREF(item);
res_used = new_res_used;
}
/* Shrink res to match the used area; this probably can't fail,
* but it's cheap to check.
*/
if (_PyUnicode_Resize(&res, res_used) < 0)
goto onError;
Done:
Py_XDECREF(internal_separator);
Py_DECREF(fseq);
return (PyObject *)res;
Overflow:
PyErr_SetString(PyExc_OverflowError,
"join() result is too long for a Python string");
Py_DECREF(item);
/* fall through */
onError:
Py_XDECREF(internal_separator);
Py_DECREF(fseq);
Py_XDECREF(res);
return NULL;
}
PyDoc_STRVAR(join__doc__,
"S.join(sequence) -> string\n\
\n\
Return a string which is the concatenation of the strings in the\n\
sequence. The separator between elements is S.");
static PyObject *
string_join(PyStringObject *self, PyObject *orig)
{
char *sep = PyString_AS_STRING(self);
const Py_ssize_t seplen = PyString_GET_SIZE(self);
PyObject *res = NULL;
char *p;
Py_ssize_t seqlen = 0;
size_t sz = 0;
Py_ssize_t i;
PyObject *seq, *item;
seq = PySequence_Fast(orig, "");
if (seq == NULL) {
return NULL;
}
seqlen = PySequence_Size(seq);
if (seqlen == 0) {
Py_DECREF(seq);
return PyString_FromString("");
}
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Py_INCREF(item);
Py_DECREF(seq);
return item;
}
}
/* There are at least two things to join, or else we have a subclass
* of the builtin types in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
* need (sz), see whether any argument is absurd, and defer to
* the Unicode join if appropriate.
*/
for (i = 0; i < seqlen; i++) {
const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
if (!PyString_Check(item)){
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(item)) {
/* Defer to Unicode join.
* CAUTION: There's no gurantee that the
* original sequence can be iterated over
* again, so we must pass seq here.
*/
PyObject *result;
result = PyUnicode_Join((PyObject *)self, seq);
Py_DECREF(seq);
return result;
}
#endif
PyErr_Format(PyExc_TypeError,
"sequence item %zd: expected string,"
" %.80s found",
i, item->ob_type->tp_name);
Py_DECREF(seq);
return NULL;
}
sz += PyString_GET_SIZE(item);
if (i != 0)
sz += seplen;
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"join() result is too long for a Python string");
Py_DECREF(seq);
return NULL;
}
}
/* Allocate result space. */
res = PyString_FromStringAndSize((char*)NULL, sz);
if (res == NULL) {
Py_DECREF(seq);
return NULL;
}
/* Catenate everything. */
p = PyString_AS_STRING(res);
for (i = 0; i < seqlen; ++i) {
size_t n;
item = PySequence_Fast_GET_ITEM(seq, i);
n = PyString_GET_SIZE(item);
Py_MEMCPY(p, PyString_AS_STRING(item), n);
p += n;
if (i < seqlen - 1) {
Py_MEMCPY(p, sep, seplen);
p += seplen;
}
}
Py_DECREF(seq);
return res;
}