Source code for tethne.networks.authors

"""
Methods for generating networks in which authors are vertices.

Methods
```````

.. autosummary::

   author_cocitation
   author_coinstitution
   author_institution
   author_papers
   coauthors

"""

import networkx as nx
import tethne.utilities as util
import tethne.data as ds
from collections import defaultdict, Counter

# MACRO for printing the 'print' statement values.
# 0 prints nothing in the console.
# 1 prints all print statements in the console.
DEBUG = 0

[docs]def author_papers(papers, node_id='ayjid', paper_attribs=[], **kwargs):
    """
    Generate an author_papers network NetworkX directed graph.

    ==============     =========================================================
    Element            Description
    ==============     =========================================================
    Node               Two kinds of nodes with distinguishing "type" attributes:
                       * type = paper    - a paper in papers
                       * type = person   - a person in papers
                       Papers node attributes defined by paper_attribs.
    Edge               Directed, Author -> his/her Paper.
    ==============     =========================================================

    Parameters
    ----------
    papers : list
        A list of wos_objects.
    node_id : string
        A key from :class:`.Paper` used to identify the nodes.
    paper_attribs : list
        List of user-provided optional arguments apart from the provided
        positional arguments.

    Returns
    -------
    author_papers_graph : networkx.DiGraph
        A DiGraph 'author_papers_graph'.

    Raises
    ------
    KeyError : Raised when node_id is not present in Papers.

    """
    author_papers_graph = nx.DiGraph(type='author_papers')

    # Validate node_id.
    meta_dict = ds.Paper()
    meta_keys = meta_dict.keys()
    meta_keys.remove('citations')
    if node_id not in meta_keys:
        raise KeyError('node_id' + node_id + ' cannot be used to identify' +
                       ' papers.')
    for entry in papers:
        # Define paper_attribute dictionary.
        paper_attrib_dict = util.subdict(entry, paper_attribs)
        paper_attrib_dict['type'] = 'paper'
        # Add paper node with attributes.
        author_papers_graph.add_node(entry[node_id], paper_attrib_dict)

        authors = util.concat_list(entry['aulast'], entry['auinit'], ' ')
        for i in xrange(len(authors)):
            # Add person node.
            author_papers_graph.add_node(authors[i], type="person")
            # Draw edges.
            author_papers_graph.add_edge(authors[i], entry[node_id],
                                   date=entry['date'])

    return author_papers_graph

[docs]def coauthors(papers, threshold=1, edge_attribs=['ayjid'], **kwargs):
    """
    Generate a co-author network.

    As the name suggests, edges are drawn between two author-vertices in the
    case that those authors published a paper together. Co-authorship networks
    are popular models for studying patterns of collaboration in scientific
    communities.

    To generate a co-authorship network, use the
    :func:`.networks.authors.coauthors` method:
    
    Author institutional affiliation is included as a node attribute, if 
    possible.

    .. code-block:: python

       >>> CA = nt.authors.coauthors(papers)
       >>> CA
       <networkx.classes.multigraph.MultiGraph object at 0x101705650>

    ==============     =========================================================
    Element            Description
    ==============     =========================================================
    Node               Author name.
    Edges              (a,b) in E(G) if a and b are coauthors on the same paper.
    ==============     =========================================================

    Parameters
    ----------
    papers : list
        A list of :class:`Paper` instances.
    threshold : int
        Minimum number of co-citations required for an edge. (default: 1)
    edge_attribs : list
        List of edge_attributes specifying which :class:`.Paper` keys (from the
        co-authored paper) to use as edge attributes. (default: ['ayjid'])

    Returns
    -------
    G : networkx.Graph
        A co-authorship network.

    """

    # TODO: Check whether papers contains :class:`.Paper` instances, and raise
    #  an exception if not.

    G = nx.Graph(type='coauthors')
    edge_att = {}
    #edge_listdict={}
    coauthor_dict = {}

    author_inst = {}
    
    for entry in papers:
        if entry['aulast'] is not None:
            # edge_att dictionary has the atributes given by user input
            #  for any edges that get added
            edge_att = util.subdict(entry, edge_attribs)
            # make a new list of aulast, auinit names
            full_names = util.concat_list(entry['aulast'],
                                          entry['auinit'],
                                          ' ')
            for a in xrange(len(full_names)):
                # Update global author-institution mappings.
                n = full_names[a]
                if entry['institutions'] is not None:
                    try:
                        inst = entry['institutions'][n]
                        try:
                            author_inst[n] += inst
                        except KeyError:
                            author_inst[n] = inst

                    except KeyError:
                        pass
                    
                for b in xrange(a+1 , len(entry['aulast'])):
                    # (author_a,author_b) tuple is key for coauthor_dict.
                    authors = full_names[a], full_names[b]
                    authors_inv = full_names[b], full_names[a]

                    if authors in coauthor_dict.keys():
                        for key,value in edge_att.iteritems():
                            coauthor_dict[authors][key].append(value)
                        coauthor_dict[authors]['weight'] += 1

                    # Author names may be in inverse order.
                    elif authors_inv in coauthor_dict.keys():
                        for key,value in edge_att.iteritems():
                            coauthor_dict[authors_inv][key].append(value)
                        coauthor_dict[authors_inv]['weight'] += 1

                    # First encounter for this author-pair.
                    else:
                        coauthor_dict[authors] = { k:[v]
                                                    for k,v
                                                    in edge_att.iteritems() }
                        coauthor_dict[authors]['weight'] = 1

    # Add edges with specified edge attributes.
    for key, val in coauthor_dict.iteritems():
        if val['weight'] >= threshold:
            G.add_edge(key[0], key[1], attr_dict=val)
    
    # Include institutional affiliations as node attributes, if possible.
    
    # Find most likely institution for each author. This won't work well if the
    #  author only occurs once in the dataset and there was no explicit
    #  author-instituion mapping.
    for k,v in author_inst.iteritems():
        top_inst = max(Counter(v))
        try:    # If an author has no coauthors, they will not appear in G.
            G.node[k]['institution'] = top_inst
        except KeyError:
            pass

    return G

[docs]def author_institution(Papers, edge_attribs=[], **kwargs):
    """
    Generate a bi-partite graph connecting authors and their institutions.
    
    This may be slightly ambiguous for WoS data where there is no explicit
    author-institution mapping. Edge weights are the number of co-associations
    between an author and an institution, which should help resolve this
    ambiguity (the more data the better).

    ==============     =========================================================
    Element            Description
    ==============     =========================================================
    Node               Author name.
    Edge               (a,b) in E(G) if a and b are authors on the same paper.
    ==============     =========================================================

    Parameters
    ----------
    Papers : list
        A list of :class:`.Paper` instances.
    edge_attribs : list
        List of edge_attributes specifying which :class:`.Paper` keys (from the
        authored paper) to use as edge attributes. For example, the 'date' key
        in :class:`.Paper` .

    Returns
    -------
    author_institution_graph : networkx.MultiGraph
        A graph describing institutional affiliations of authors in the corpus.
    """

    author_institution_graph = nx.MultiGraph(type='author_institution')
    #The Field in Papers which corresponds to authors and affliated institutions
    # is "institutions"
    # { 'institutions' : { Authors:[institutions_list]}}
    for paper in Papers:
        if paper['institutions'] is not None:
            auth_inst = paper['institutions']
            edge_attrib_dict = util.subdict(paper, edge_attribs)
            authors = auth_inst.keys()
            for au in authors:
                #add node of type 'author'
                author_institution_graph.add_node(au, type='author')
                ins_list = Counter(auth_inst[au])
                for ins_str,count in ins_list.iteritems():
                    # Add node of type 'institutions'.
                    author_institution_graph.add_node(ins_str, \
                                                      type='institution')

                    author_institution_graph.add_edge(au, ins_str, \
                                              attr_dict=edge_attrib_dict, \
                                              weight=count )


    return author_institution_graph

[docs]def author_coinstitution(Papers, threshold=1, **kwargs):
    """
    Generate a co-institution graph, where edges indicate shared affiliation.

    Some bibliographic datasets, including data from the Web of Science,
    includes the institutional affiliations of authors. In a co-institution
    graph, two authors (vertices) have an edge between them if they share an
    institutional affiliation in the dataset. Note that data about institutional
    affiliations varies in the WoS database so this will yield more reliable
    results for more recent publications.

    To generate a co-institution network, use the
    :func:`.networks.authors.author_coinstitution` method:

    .. code-block:: python

       >>> ACI = nt.authors.author_coinstitution(papers)
       >>> ACI
       <networkx.classes.graph.Graph object at 0x106571190>

    ==============     =========================================================
    Element            Description
    ==============     =========================================================
    Node               Authors.
    Node Attribute     type (string). 'author' or 'institution'.
    Edges              (a, b) where a and b are affiliated with the same
                       institution.
    Edge attribute     overlap (int). number of shared institutions.
    ==============     =========================================================

    Parameters
    ----------
    Papers : list
        A list of wos_objects.
    threshold : int
        Minimum institutional overlap required for an edge.

    Returns
    -------
    coinstitution : NetworkX :class:`.graph`
        A coinstitution network.

    """
    coinstitution = nx.Graph(type='author_coinstitution')


    # The Field in Papers which corresponds to the affiliation is "institutions"
    #  { 'institutions' : { Authors:[institutions_list]}}
    author_institutions = {}  # keys: author names, values: list of institutions
    for paper in Papers:
        if paper['institutions'] is not None:
            for key, value in paper['institutions'].iteritems():
                try:
                    author_institutions[key] += value
                except KeyError:
                    author_institutions[key] = value
        authors = author_institutions.keys()
        for i in xrange(len(authors)):
            for j in xrange(len(authors)):
                if i != j:
                    # Compare 2 author dict elements.
                    overlap = (set(author_institutions[authors[i]])
                                &
                                set(author_institutions[authors[j]]))
                    if len(overlap) >= threshold:
                        coinstitution.add_edge(authors[i], authors[j], \
                                               overlap=len(overlap))
                    else :
                        pass
        #62809656
        attribs_dict = {}
        for node in coinstitution.nodes():
            attribs_dict[node] = 'author'
        nx.set_node_attributes( coinstitution, 'type', attribs_dict )


    return coinstitution

[docs]def author_cocitation(papers, threshold=1, **kwargs):
    """
    Generates an author co-citation network; edges indicate co-citation of
    authors' papers.

    Similar to :func:`.papers.cocitation`\, except that vertices are authors
    rather than papers. To generate an author co-citation network, use the
    :func:`.networks.authors.author_cocitation` method:

    .. code-block:: python

       >>> ACC = nt.authors.author_cocitation(papers)
       >>> ACC
       <networkx.classes.graph.Graph object at 0x106571190>

    ==============     =========================================================
    Element            Description
    ==============     =========================================================
    Nodes              Author name.
    Edge               (a, b) if a and b are referenced by the same paper in
                       papers
    Edge attribute     'weight', the number of papers that co-cite a and b.
    ==============     =========================================================

    Parameters
    ----------
    papers : list
        a list of :class:`.Paper` objects.
    threshold : int
        Minimum number of co-citations required to create an edge between
        authors.

    Returns
    -------
    cocitation : :class:`.networkx.Graph`
        A cocitation network.

    """

    author_cocitations = nx.Graph(type='author_cocitation')

    # We'll use tuples as keys. Values are the number of times each pair
    # of 2 authors is co-cited.

    cocitations = {}
    delim = ' '

    for paper in papers:
        # Some papers don't have citations.
        if paper['citations'] is not None:
            # n is the number of papers in the provided list of Papers.
            n = len(paper['citations'])
            found_authors = []  # To avoid extra incrementation of author pairs.
            if n > 1:   # No point in proceeding if there is only one citation.
                for i in xrange(0, n):

                    # al_i_str is the author i's last name.
                    # converting list to str
                    al_i_str = ''.join(map(str, \
                                            (paper['citations'][i]['aulast'])))

                    # ai_i_str is the author i's first name
                    # converting list to str

                    ai_i_str = \
                        ''.join(map(str,(paper['citations'][i]['auinit'])))

                    # Making it a tuple,that it becomes key for cocitations dict
                    author_i_str = al_i_str + delim + ai_i_str

                    # Start inner loop at i+1,\
                    # to avoid redundancy and self-loops.

                    for j in xrange(i+1, n):
                        # al_j_str is the last name of author j
                        al_j_str = ''.join(map(str, \
                                            (paper['citations'][j]['aulast'])))

                        # ai_j_str is the author j's first name
                        # converting list to str

                        ai_j_str = ''.join(map(str, \
                                           (paper['citations'][j]['auinit'])))

                        # Making it a tuple so that it becomes the key for
                        # cocitations dict
                        author_j_str = al_j_str + delim + ai_j_str

                        # 2 tuples which are going to be the keys of the dict.
                        authors_pair = (author_i_str.upper(), \
                                                author_j_str.upper())
                        authors_pair_inv = (author_j_str.upper(), \
                                                author_i_str.upper())

                        if DEBUG:
                            print "Authors Pairs: ", authors_pair
                            print "Authors Pairs inv: ", authors_pair_inv

                        # Have these authors been co-cited before?
                        try:
                            # check if author pair is not already \
                            # in the list and
                            # if the pair and inverse are not same. This is done
                            # to avoid drawing edges between same authors(nodes)

                            if (authors_pair not in found_authors
                                    and (authors_pair != authors_pair_inv)):
                                cocitations[authors_pair] += 1
                                found_authors.append(authors_pair)

                        except KeyError:
                            try: # May have been entered in opposite order.
                                if (authors_pair_inv not in found_authors
                                    and (authors_pair != authors_pair_inv)):
                                    cocitations[authors_pair_inv] += 1
                                    found_authors.append(authors_pair_inv)
                                # Networkx will ignore add_node
                                # if those nodes are already present
                            except KeyError:
                                # First time these papers have been co-cited.
                                cocitations[authors_pair] = 1
                                found_authors.append(authors_pair)

    # Create the network.
    for key, val in cocitations.iteritems():
        # If the weight is greater or equal to the user I/P threshold
        if val >= threshold :
            # Add edge between the 2 co-cited authors
            author_cocitations.add_edge(key[0], key[1], weight=val)

    return author_cocitations
Navigation

Source code for tethne.networks.authors

Quick search

Navigation