Supplementary material for: Zhang, Yutong. 2021. A Phonological Analysis of the Word-Borrowing Process in Volapük. Senior Honors Thesis for Linguistics Program, Washington University in St. Louis.

This file contains all commands produced in Python for the descriptive analysis in Section 4.2 of the thesis, as mentioned in Methods (Section 2).

In [1]:
import pandas as pd
from collections import Counter
In [2]:
sExcelFile="./source/wordlist.xlsx"
In [3]:
df = pd.read_excel(sExcelFile,sheet_name='Sheet1')
In [4]:
nrows=df.shape[0]
ncols=df.columns.size
In [5]:
print('Number of bases: '+str(nrows))
Number of bases: 1000
In [6]:
print(df.columns)
Index(['English translation', 'New Vp base', 'IPA for New Vp base',
       'Source language', 'Source word', 'IPA for source word',
       'Source language 2', 'Source word 2', 'IPA for source word 2',
       'Old Vp base derived from the same source', 'IPA for Old Vp base'],
      dtype='object')
In [7]:
print(df.iloc[21,2])
print(df.iloc[0,5])
    #These commands provide examples of the bases.
beˈvy
a
In [8]:
list1 = df['IPA for New Vp base'].tolist()
    #This command extracts the IPA column and converts it to a list
In [9]:
import re
In [10]:
onset3c = []

for a in list1:
    if re.findall(r"^[^aeiouɛøy]{3}[aeiouɛøy]",a):
        onset3c.append(a)

print(set(onset3c))
print(len(onset3c))
    #These commands give all bases starting with 3 consonants and the quantity.
{'ztral'}
1
In [11]:
onset2c = []

for a in list1:
    if re.findall(r"^[^aeiouɛøy]{2}[aeiouɛøy]",a):
        onset2c.append(a)

print(len(onset2c))
    #These commands give the number of bases starting with 2 consonants.
238
In [12]:
onset1c = []

for a in list1:
    if re.findall(r"^[^aeiouɛøy][aeiouɛøy]",a):
        onset1c.append(a)

print(len(onset1c))
    #These commands give the number of bases starting with 1 consonant.
736
In [13]:
onsetv = []

for a in list1:
    if re.findall(r"^[aeiouɛøy]",a):
        onsetv.append(a)

print(set(onsetv))
print(len(onsetv))
    #These commands give all bases starting with a vowel and the quantity.
{'u', 'al', 'ɛz', 'if', 'ab', 'eˈkø', 'yf', 'el', 'ek', 'iz', 'i', 'y', 'e', 'øn', 'ɛ', 'az', 'aˈi', 'a', 'yn', 'eˈbo', 'in', 'ad', 'an', 'iˈbo', 'øm'}
25
In [14]:
coda2c = []

for a in list1:
    if re.findall(r"[aeiouɛøy][^aeiouɛøy]{2}$",a):
        coda2c.append(a)

print(set(coda2c))
print(len(coda2c))
    #These commands give all bases ending in 2 consonants and the quantity.
{'bɛld', 'zvizt', 'dʒɛrm', 'tozt', 'turn', 'bank', 'fɛrm', 'tɛˈlɛkt', 'rizk', 'ɡold', 'larˈɡent', 'mazt', 'pold', 'lɛrn', 'zirk', 'dizt', 'horn', 'koˈmand', 'proˈtezt', 'virɡ', 'fund', 'lɛmp', 'volf', 'muzk', 'konˈzɛrt', 'kult'}
26
In [15]:
coda1c = []

for a in list1:
    if re.findall(r"[aeiouɛøy][^aeiouɛøy]$",a):
        coda1c.append(a)

print(len(coda1c))
    #These commands give the number of bases ending in 1 consonant.
894
In [16]:
codav = []

for a in list1:
    if re.findall(r"[aeiouɛøy]$",a):
        codav.append(a)

print(set(codav))
print(len(codav))
    #These commands give all bases ending in a vowel and the quantity.
{'u', 'beˈvy', 'klu', 'viˈzy', 'na', 'fe', 'dy', 'viˈo', 'to', 'aˈi', 'ko', 'lo', 'ʒy', 'zy', 'ply', 'by', 'zo', 'plu', 'me', 'dzy', 'vø', 'he', 'dza', 'vo', 'a', 'de', 've', 'do', 'te', 'ni', 'dze', 'ba', 'iˈbo', 'bo', 'mu', 'døˈnu', 'ɡa', 'vy', 'mø', 'pu', 'no', 'i', 'baˈi', 'ɡy', 'bi', 'pro', 'zi', 'ta', 'e', 'bu', 'mo', 'zu', 'loˈve', 'za', 'nɛˈi', 'ɡø', 'dø', 'lɛ', 'ja', 'je', 'ma', 'pla', 'lø', 'plɛ', 'dzu', 'eˈkø', 'nu', 'du', 'fa', 'ka', 'ly', 'y', 'va', 'ɛ', 'liˈo', 'di', 'eˈbo', 'ti', 'ze', 'pø'}
80
In [17]:
onset1 = []

def is_consonant(s):
    vowels = list("aeiouyøɛ")
    if s in vowels:
        return False
    else:
        return True   
    
for word in list1:
    onset0 = []
    for consonant in word:
        if is_consonant(consonant):
            onset0.append(consonant)
        else:
            break
    onset1.append("".join(onset0))
     
#These commands give a list of possible word-initial consonants (and clusters).
  
count_onset1 = Counter(onset1)
count_onset1.most_common()

#These commands list the word-initial consonants according to their frequencies.
Out[17]:
[('z', 83),
 ('l', 72),
 ('d', 69),
 ('m', 68),
 ('k', 60),
 ('v', 54),
 ('f', 52),
 ('t', 47),
 ('n', 44),
 ('b', 43),
 ('p', 40),
 ('zt', 32),
 ('ɡ', 31),
 ('r', 28),
 ('', 25),
 ('ʒ', 20),
 ('pl', 19),
 ('pr', 19),
 ('dʒ', 18),
 ('dz', 18),
 ('kl', 16),
 ('j', 16),
 ('fl', 15),
 ('bl', 14),
 ('ɡl', 11),
 ('ɡr', 10),
 ('h', 9),
 ('kr', 8),
 ('zp', 8),
 ('dr', 7),
 ('fr', 7),
 ('zk', 7),
 ('zl', 6),
 ('zv', 6),
 ('tr', 6),
 ('br', 5),
 ('zm', 3),
 ('ɡz', 2),
 ('kv', 1),
 ('ztr', 1)]
In [18]:
vowel1 =[]
for word in list1:
    vowel1.append(re.split(r'[bdʒfɡhklmnprztvj]',word))
    
vowel2 = [syl for word in vowel1 for syl in word]
vowel3 = [s for s in vowel2 if s != ""]
vowel4 = [s for s in vowel3 if s != "ˈ"]
vowel5 = [s.replace('ˈ', '') for s in vowel4]
    #this gives all possible vowels together with vowel combinations

vowel6 =[]
for word in list1:
    vowel6.append(re.split(r'[bdʒfɡhklmnprztvjˈ]',word))
vowel7 = [syl for word in vowel6 for syl in word]
vowel8 = [s for s in vowel7 if s != ""]

def split(word): 
    return [letter for letter in word]

vowel9 = split("".join(vowel8))
    #These commands give all possible individual vowels (no vowel sequences).
    
count_vowel = Counter(vowel9)
count_vowel.most_common()

#These commands list the vowels according to their frequencies.
Out[18]:
[('e', 273),
 ('i', 272),
 ('a', 227),
 ('o', 183),
 ('u', 139),
 ('ɛ', 96),
 ('y', 82),
 ('ø', 70)]
In [19]:
count_vowelcomb = Counter(vowel5)
count_vowelcomb.most_common()

    #These commands lists all possible vowels as well as vowel sequences according to their frequencies.
Out[19]:
[('e', 204),
 ('i', 186),
 ('a', 185),
 ('o', 158),
 ('u', 120),
 ('ɛ', 84),
 ('y', 76),
 ('ø', 69),
 ('ei', 28),
 ('ie', 17),
 ('ai', 11),
 ('ia', 9),
 ('ea', 7),
 ('oe', 6),
 ('io', 6),
 ('oa', 5),
 ('au', 5),
 ('ui', 4),
 ('iɛ', 4),
 ('eo', 3),
 ('ɛi', 3),
 ('ya', 2),
 ('uɛ', 2),
 ('ey', 2),
 ('ue', 2),
 ('eu', 2),
 ('oi', 2),
 ('ou', 2),
 ('ye', 1),
 ('ae', 1),
 ('ao', 1),
 ('iu', 1),
 ('ua', 1),
 ('yɛ', 1),
 ('iø', 1),
 ('ɛɛ', 1)]
In [20]:
vowelcomb = []

for s in list1:
    if re.findall(r"[aeiouyøɛ]ˈ?[aeiouyøɛ]",s):
        vowelcomb.append(s)
        
print (vowelcomb)

    #These commands gives a list of bases that have vowel sequences in them.

vowelc1 =[]

for word in vowelcomb:
    vowelc1.append(re.split(r'[bdʒfɡhklmnprztvj]',word))
    
def is_useful(s):
    unnec = list("aeiouyøɛˈ")
    if s in unnec:
        return False
    else:
        return True

vowelc2 = [syl for word in vowelc1 for syl in word]
vowelc3 = [s for s in vowelc2 if is_useful(s) is True]
vowelc4 = [s for s in vowelc3 if s != ""]

print (vowelc4)

    #These commands gives the vowel sequences with stress marked.
['aˈi', 'baˈi', 'beˈit', 'biˈed', 'biˈeɡ', 'bleˈib', 'boˈad', 'byˈad', 'byˈed', 'dʒeˈin', 'daˈif', 'daˈut', 'deˈad', 'dediˈet', 'deˈim', 'diˈab', 'diˈal', 'diamaˈin', 'diaˈmet', 'dizeˈin', 'driˈen', 'duˈin', 'feˈin', 'feˈit', 'fiˈam', 'fiˈed', 'fiˈen', 'ɡaˈen', 'ɡeˈid', 'ɡeˈil', 'ʒoviˈal', 'ʒuˈit', 'kaliˈet', 'kiˈen', 'kleˈib', 'kleˈil', 'kliˈen', 'koˈed', 'konziˈen', 'kopiˈed', 'kruˈɛl', 'laˈid', 'laˈod', 'laˈut', 'leˈad', 'leˈar', 'leˈiɡ', 'leˈod', 'leˈyl', 'liˈɛn', 'liˈed', 'liˈeɡ', 'lieˈnet', 'liˈo', 'liˈun', 'loˈen', 'luˈeɡ', 'maˈif', 'mɛniˈot', 'meˈik', 'meˈuɡ', 'miteˈod', 'muˈad', 'myˈɛt', 'nɛˈi', 'nɛˈit', 'naˈud', 'neˈit', 'neˈod', 'neˈud', 'noˈat', 'noˈet', 'noˈid', 'noˈub', 'pɛnziˈon', 'peˈin', 'periˈod', 'piˈad', 'piˈan', 'piaˈnod', 'pleˈid', 'poˈed', 'proˈib', 'puˈin', 'radiˈon', 'reˈid', 'reˈiɡ', 'reˈin', 'reliˈɛf', 'zaˈid', 'zaˈil', 'zaˈit', 'zaˈun', 'zeˈad', 'zeˈat', 'zeˈid', 'zeˈif', 'zeˈil', 'zeˈim', 'zeˈiv', 'ziˈɛm', 'ziˈem', 'ziˈør', 'zoˈaf', 'zoˈal', 'zoˈar', 'zoˈel', 'zpeˈar', 'ztɛˈɛn', 'ztaˈud', 'zteˈif', 'ztoenˈzor', 'zyˈad', 'zuˈɛm', 'zuˈem', 'zviˈet', 'taˈib', 'taˈim', 'teˈat', 'tiˈɛd', 'toˈum', 'trɛˈit', 'tuˈiɡ', 'veˈiɡ', 'veˈit', 'veˈyt', 'viˈet', 'viˈo', 'dzeˈil']
['aˈi', 'aˈi', 'eˈi', 'iˈe', 'iˈe', 'eˈi', 'oˈa', 'yˈa', 'yˈe', 'eˈi', 'aˈi', 'aˈu', 'eˈa', 'iˈe', 'eˈi', 'iˈa', 'iˈa', 'ia', 'aˈi', 'iaˈ', 'eˈi', 'iˈe', 'uˈi', 'eˈi', 'eˈi', 'iˈa', 'iˈe', 'iˈe', 'aˈe', 'eˈi', 'eˈi', 'iˈa', 'uˈi', 'iˈe', 'iˈe', 'eˈi', 'eˈi', 'iˈe', 'oˈe', 'iˈe', 'iˈe', 'uˈɛ', 'aˈi', 'aˈo', 'aˈu', 'eˈa', 'eˈa', 'eˈi', 'eˈo', 'eˈy', 'iˈɛ', 'iˈe', 'iˈe', 'ieˈ', 'iˈo', 'iˈu', 'oˈe', 'uˈe', 'aˈi', 'iˈo', 'eˈi', 'eˈu', 'eˈo', 'uˈa', 'yˈɛ', 'ɛˈi', 'ɛˈi', 'aˈu', 'eˈi', 'eˈo', 'eˈu', 'oˈa', 'oˈe', 'oˈi', 'oˈu', 'iˈo', 'eˈi', 'iˈo', 'iˈa', 'iˈa', 'iaˈ', 'eˈi', 'oˈe', 'oˈi', 'uˈi', 'iˈo', 'eˈi', 'eˈi', 'eˈi', 'iˈɛ', 'aˈi', 'aˈi', 'aˈi', 'aˈu', 'eˈa', 'eˈa', 'eˈi', 'eˈi', 'eˈi', 'eˈi', 'eˈi', 'iˈɛ', 'iˈe', 'iˈø', 'oˈa', 'oˈa', 'oˈa', 'oˈe', 'eˈa', 'ɛˈɛ', 'aˈu', 'eˈi', 'oe', 'yˈa', 'uˈɛ', 'uˈe', 'iˈe', 'aˈi', 'aˈi', 'eˈa', 'iˈɛ', 'oˈu', 'ɛˈi', 'uˈi', 'eˈi', 'eˈi', 'eˈy', 'iˈe', 'iˈo', 'eˈi']
In [21]:
reverselist=[]
coda1=[]
for i in list1:
    reverselist.append(i[::-1])
    
for word in reverselist:
    coda0 = []
    for consonant in word:
        if is_consonant(consonant):
            coda0.append(consonant)
        else:
            break
    coda1.append("".join(coda0))
    
coda2=[]
for i in coda1:
    coda2.append(i[::-1])

    #These commands give a list of possible word-final consonants (and clusters).
    
count_coda = Counter(coda2)
count_coda.most_common()

    #These commands list the word-final consonants according to their frequencies.
Out[21]:
[('d', 151),
 ('t', 128),
 ('n', 125),
 ('l', 110),
 ('', 80),
 ('m', 80),
 ('f', 61),
 ('ɡ', 53),
 ('r', 44),
 ('k', 38),
 ('b', 37),
 ('p', 32),
 ('v', 24),
 ('z', 11),
 ('zt', 5),
 ('ld', 3),
 ('rn', 3),
 ('rm', 2),
 ('nd', 2),
 ('zk', 2),
 ('nk', 1),
 ('rt', 1),
 ('lt', 1),
 ('mp', 1),
 ('nt', 1),
 ('rk', 1),
 ('kt', 1),
 ('rɡ', 1),
 ('lf', 1)]
In [22]:
ends = []

for a in list1:
    if re.findall(r"z$",a):
        ends.append(a)

print(ends)
    #These commands give all bases ending in /z/.
['az', 'ɛz', 'biz', 'daz', 'diz', 'iz', 'koz', 'lɛz', 'poz', 'ziz', 'zuz']
In [ ]: